From cb8b710ee7b18f7e7464d1796176ba2787aa4ed4 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 15:30:24 +0000 Subject: [PATCH 01/98] docs(adr): propose native MCP client for openab-agent Adds ADR for in-core rmcp + progressive-disclosure meta-tool, deferred to symmetry with the Skills extension pattern from PR #955. Memory analysis rules out the sidecar alternative; per-session config refresh replaces file-watcher hot reload to drop ~150 LOC of race-condition hotspot. Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 605 +++++++++++++++++++++++++++++++++++ 1 file changed, 605 insertions(+) create mode 100644 docs/adr/openab-agent-mcp.md diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md new file mode 100644 index 000000000..bd078f604 --- /dev/null +++ b/docs/adr/openab-agent-mcp.md @@ -0,0 +1,605 @@ +# ADR: openab-agent — MCP Client Support + +## 1. Context & Motivation + +`openab-agent` is the native Rust coding agent shipped with OpenAB (Cargo workspace member `openab-agent/`, introduced 2026-05-26 via PR #924, targeted at the v0.8.4-beta series). Its `docs/adr/openab-agent.md` charter commits to a small surface: 4 built-in tools (`read`, `write`, `edit`, `bash`), a ~500-token system prompt, no LLM SDK dependency, multi-model via thin HTTP. PR #955 added `Skills` support (`openab-agent/src/skills.rs`, 224 LOC, zero new crate dependencies) as the first extension mechanism — descriptor-only injection plus on-demand load via the existing `read` tool. + +The agent currently has **no MCP (Model Context Protocol) client**. This ADR proposes one. + +### 1.1 Why MCP for openab-agent + +- **Ecosystem leverage.** Every Postgres/GitHub/Figma/Jira/Slack integration users will ask for already exists as an MCP server (mcpbundles.com tracks ~9k tools across ~1.4k providers as of 2026-Q2). Re-implementing each as a Skill or built-in tool is duplicative. +- **Parity with peer agents.** Claude Code, Codex CLI, Cursor, Cline, Goose, opencode, OpenHands, Kiro, Junie, Roo Code all ship MCP clients. Users coming from any of these expect `mcpServers` config to "just work". +- **Skills cannot replace MCP.** Per Anthropic's framing — **Skills = procedural (how to do); MCP = connectivity (where data/tools live)**. Skills wrap CLI tools; MCP handles network, auth, streaming, server-side state. + +### 1.2 Why now + +Skills landed in PR #955. The repo's design pattern for "first-tier-but-tiny" extension is now established. MCP is the natural next layer. + +### 1.3 Prior internal attempts + +Four MCP PRs to upstream `openabdev/openab` have closed without merging: + +| PR | Title | State | Scope | +|---|---|---|---| +| #329, #330 | `feat(mcp): inject per-user MCP servers from Discord profiles into ACP sessions` | CLOSED | Broker forward | +| #345 | `feat: inject per-user MCP servers into ACP sessions` | CLOSED | Broker forward | +| #903 | `feat(agent): forward configured MCP servers` | CLOSED | Broker forward | + +All four targeted the broker layer — pass MCP server config through to the backing CLI (Claude Code / Codex / Cursor) and let that CLI handle MCP. **None addressed native MCP support inside `openab-agent` itself.** This ADR is scoped to the native agent. + +Issue #753 remains open and is broker-side (`[agent].inherit_cloud_mcp_servers` opt-out). This ADR does not change broker behavior. + +--- + +## 2. Goals & Non-Goals + +### In scope + +- MCP **client** support inside `openab-agent` +- Transports: stdio (local servers — Anthropic reference, npm/pypi community) and Streamable HTTP (vendor-hosted SaaS — Atlassian, Figma, Linear, Notion, Sentry, etc.). HTTP+SSE intentionally omitted — superseded by MCP spec 2025-11-25 and actively sunset by vendors (Atlassian deadline 2026-06-30). See §3.8 for landscape. +- OAuth login flow for MCP servers requiring it +- Per-session lifecycle with idle eviction +- Per-session config refresh — new ACP session re-reads `mcpServers` from disk (no file watcher, no mid-session reload; openab spawns short-lived sessions per thread so process restart is rarely needed) +- Progressive-disclosure tool surface (single meta-tool, not flat fan-out) +- Reuse of existing `src/auth.rs` PKCE / TokenStore where possible + +### Out of scope + +- MCP **server** functionality (host only) +- WASM / cdylib plugin runtime +- Sidecar / out-of-process MCP bridge +- Per-thread MCP isolation (broker concern, not agent) +- Replacing Skills (Skills and MCP coexist) + +--- + +## 3. Prior Art Survey + +Per `docs/adr/pr-contribution-guidelines.md`, OpenClaw and Hermes Agent are the mandatory references for architectural PRs. OpenClaw was evaluated and found **not applicable to this ADR**: it is a multi-channel messaging gateway (chat platforms ↔ MCP), not a coding agent. Its substantial MCP code (~2,900 LOC across `src/agents/mcp-*`, `src/config/mcp-*`, `src/mcp/`) addresses channel bridging rather than agent-side tool calling. The closer comparison for a coding-agent MCP client is **opencode (§3.2)**, included in addition to Hermes Agent. + +Five projects are surveyed below. Each contributes a design pattern the chosen architecture borrows from: + +| § | Project | Borrowed pattern | +|---|---|---| +| 3.1 | Hermes Agent | Circuit breaker (per-server fail threshold + cooldown) | +| 3.2 | opencode | Per-server status enum + RFC 7591 dynamic OAuth | +| 3.3 | pi-mcp-adapter | Single `mcp` meta-tool with action dispatch (progressive disclosure) | +| 3.4 | Goose | MCP-as-primary-extension validation in a Rust codebase | +| 3.5 | OpenHands | `filter_tools_regex` per-server tool scoping | + +### 3.1 Hermes Agent (mandatory reference) + +- Repo: https://github.com/NousResearch/hermes-agent (Python, Apache 2.0) +- MCP module: ~5,175 LOC across 3 files (`mcp_tool.py` + 2 OAuth modules) +- SDK: official `mcp==1.26.0` +- Transports: stdio + Streamable HTTP + SSE +- Tool naming: `mcp_{server}_{tool}` (single-underscore separators, no `__` boundary marker) +- Lifecycle: per-server long-lived `asyncio.Task` on dedicated background event loop +- Lazy loading: eager connect, but background-thread discovery with 0.75s join — non-blocking +- Hot reload: mtime-poll on `~/.hermes/config.yaml` + `/reload-mcp` slash command +- OAuth: mtime-based disk-watch for cross-process token refresh +- **Notable**: ships a real circuit breaker — threshold 3 failures / 60s cooldown / half-open probe state. The only project surveyed that does so. + +### 3.2 opencode (anomalyco/opencode, formerly sst/opencode) + +- Repo: https://github.com/anomalyco/opencode (TypeScript, MIT) — `sst/opencode` 301-redirects here after org transfer +- **Closest coding-agent comparison to openab-agent** +- MCP module: ~1,664 LOC across 5 files (`mcp/`, `auth.ts`, OAuth provider/callback, config) +- SDK: `@modelcontextprotocol/sdk@1.27.1` +- Transports: stdio + Streamable HTTP + SSE +- Tool naming: `{sanitized_client}_{sanitized_tool}` (single underscore) +- Lifecycle: shared singleton service via Effect `Layer`; one `Client` per server +- Lazy loading: eager connect with `concurrency: "unbounded"`; per-server status union prevents one bad server from crashing others +- Hot reload: subscribes to MCP spec's `notifications/tools/list_changed`; **no file watcher** for config — config change still requires restart +- OAuth: RFC 7591 dynamic client registration, callback `http://127.0.0.1:19876/mcp/oauth/callback`, EffectFlock cross-process locking on token store +- **Known issues** (cited as architectural cautionary tales): #11868 (113 GB virtual-memory leak, Windows v1.1.21), #7261 (heap not released + MCP orphan processes, v1.1.6), #13041 (per-session MCP+LSP duplication across concurrent sessions) — all rooted in child-process lifecycle, not protocol code + +### 3.3 pi-mcp-adapter + +- Repo: https://github.com/nicobailon/pi-mcp-adapter (TypeScript, MIT) +- An out-of-tree extension for the Pi coding agent (`pi.extensions`) — Pi itself has **no native MCP** +- MCP module: ~3,661 LOC (server-manager, proxy-modes, direct-tools, OAuth) +- SDK: `@modelcontextprotocol/sdk@^1.25.1` + `@modelcontextprotocol/ext-apps@^1.2.2` +- Transports: stdio + Streamable HTTP + SSE +- **Notable — the reason this is cited**: ships a **single `mcp` meta-tool** with sub-actions (`connect`, `describe`, `search`, `list`, `call`, `status`). All MCP capability is exposed through this one tool. Lazy connect happens inside `lazyConnect()` on first action that needs it. This is the **progressive-disclosure pattern** this ADR adopts. + +### 3.4 Goose (block / aaif-goose) + +- Repo: https://github.com/block/goose → https://github.com/aaif-goose/goose (Rust, Apache 2.0) +- **Most relevant precedent: a Rust coding agent built around MCP** +- Launched Jan 2025 with MCP as the *only* extension surface (no first-party plugin API to retrofit) +- Hand-rolled `mcp-client` crate (predated official Rust SDK) +- Per-session `Agent` owns an `ExtensionManager` that spawns MCP servers (stdio/SSE) as child processes +- Tools flattened into one namespace; extension name used as prefix to avoid collisions +- Supports `tools/list_changed` for hot reload +- Precedent for a Rust agent shipping MCP as the primary extension surface without WASM / cdylib / sidecar plumbing. + +### 3.5 OpenHands (All-Hands-AI) + +- Repo: https://github.com/OpenHands/OpenHands (Python, MIT) +- SDK: FastMCP (jlowin/fastmcp), not the reference SDK +- **Notable**: per-agent `filter_tools_regex` config — subset a server's tools without modifying the server. OAuth tokens cached under `~/.fastmcp/oauth-mcp-client-cache/` with auto-refresh; explicit "incompatible with headless" caveat for browser-based auth. +- Cited for OAuth + tool-surface scoping patterns where Hermes/opencode/Pi are weaker. + +### 3.6 Comparison matrix + +| | Hermes | opencode | pi-mcp-adapter | Goose | OpenHands | +|---|---|---|---|---|---| +| Language | Python | TS | TS | Rust | Python | +| SDK | mcp 1.26 | sdk 1.27 | sdk 1.25 | hand-rolled | FastMCP | +| Transports | stdio+HTTP+SSE | stdio+HTTP+SSE | stdio+HTTP+SSE | stdio+SSE | stdio+HTTP | +| Tool naming | `mcp_s_t` | `s_t` | configurable | ext-prefix | filter | +| Lifecycle | per-srv task | shared singleton | per-ext + idle 10m | per-session ExtensionMgr | per-agent | +| Lazy connect | no | no | ✅ meta | no (eager) | no | +| Hot reload | mtime+cmd | `tools/list_changed` | session boundary | `tools/list_changed` | no | +| OAuth | mtime disk-watch | RFC7591 + Flock | PKCE+auto | ? | FastMCP cache | +| Circuit breaker | ✅ 3/60s | no | partial | no | no | +| LOC | ~5,175 | ~1,664 | ~3,661 | unmeasured | unmeasured | + +### 3.7 Skills vs MCP — industry research + +Anthropic positions the two as **complementary**, not competing. The 2025-2026 consensus across practitioner blogs (Simon Willison, Anthropic engineering, StackOne) converged on: + +``` + Skills MCP + ────── ──── + Procedural knowledge Live connectivity + Markdown + YAML frontmatter Protocol spec + SDK + ~100 tokens/skill in prompt 10K-17K tokens/server in prompt + Body lazy-loaded via read tool All tool schemas eagerly loaded + Local file Server (process or HTTP endpoint) + No auth, no lifecycle OAuth, lifecycle, transports + Open standard (Dec 2025) Linux Foundation steward (late 2025) +``` + +**Adoption**: no major OSS coding agent has rejected MCP in favor of Skills-only (or vice versa). All 11 surveyed agents (Claude Code, Codex CLI, Gemini CLI, Cursor, Cline, Goose, opencode, Junie, Kiro, Roo, GitHub Copilot agent-mode) support both. + +**Cost data**: large MCP server collections have been documented consuming substantial context budget — StackOne benchmarks Sonnet 4.6 at 42% tool-selection accuracy on the unmodified MCP surface vs 80% with their Code Mode wrapper, motivating the spec-level fix in MCP SEP-1576 ("Mitigating Token Bloat in MCP") which proposes progressive disclosure (**not yet ratified**). + +**Implication for this ADR**: progressive disclosure is not optional for openab-agent. The agent's design principle commits to a ~500-token system prompt; a naïve flat MCP integration would 30× that budget. Skills' descriptor-only injection pattern is the precedent. + +### 3.8 Transport landscape & SaaS MCP server adoption + +MCP defines three transport profiles. Their 2026-Q2 status: + +| Transport | Spec status | Where it lives | +|---|---|---| +| **stdio** | Stable | Local child process — Anthropic reference servers, npm/pypi community packages | +| **Streamable HTTP** | Current (MCP spec 2025-11-25), supersedes HTTP+SSE | Vendor-hosted SaaS endpoints | +| **HTTP+SSE** | Deprecated by spec 2025-11-25; vendor sunsets in progress | Legacy fixtures — Atlassian sunsets 2026-06-30 | + +``` + ┌──────────────────────────────── MCP Server Universe ─────────────────────────────────┐ + │ │ + │ ┌─────────────────────────────┐ ┌────────────────────────────────────┐ │ + │ │ LOCAL (majority of registry) │ │ REMOTE (vendor SaaS, growing) │ │ + │ │ │ │ │ │ + │ │ Transport: stdio │ │ Transport: Streamable HTTP │ │ + │ │ │ │ │ │ + │ │ filesystem sqlite │ │ Atlassian Figma Linear │ │ + │ │ postgres puppeteer │ │ Notion Sentry Supabase │ │ + │ │ github fetch │ │ HubSpot Slack Stripe │ │ + │ │ time gitlab │ │ Cloudflare Vercel Neon ... │ │ + │ │ ... │ │ │ │ + │ └─────────────────────────────┘ └────────────────────────────────────┘ │ + │ │ + │ ┌────────────────────────────────────────────────────────────────────────────┐ │ + │ │ LEGACY (deprecated, vendor sunsets in progress) │ │ + │ │ Transport: HTTP+SSE │ │ + │ │ e.g. Atlassian https://mcp.atlassian.com/v1/sse (off 2026-06-30) │ │ + │ └────────────────────────────────────────────────────────────────────────────┘ │ + │ │ + └──────────────────────────────────────────────────────────────────────────────────────┘ +``` + +#### Local stdio servers (representative sample) + +Anthropic reference + community packages. All ship as `command + args`; no network endpoint. + +| Server | Implementation | Distribution | +|---|---|---| +| `mcp-server-filesystem` | Node | `@modelcontextprotocol/server-filesystem` (npm) | +| `mcp-server-sqlite` | Python | `mcp-server-sqlite` (pypi) | +| `mcp-server-postgres` | Python | `mcp-server-postgres` (pypi) — local DB | +| `mcp-server-puppeteer` | Node | `@modelcontextprotocol/server-puppeteer` (npm) | +| `mcp-server-github` | Go / Node | `github-mcp-server` (binary) / `@modelcontextprotocol/server-github` (npm) | +| `mcp-server-fetch` | Python | `mcp-server-fetch` (pypi) | +| `mcp-server-time` | Rust | `mcp-server-time` (cargo) | +| `mcp-server-gitlab` | Node | `@modelcontextprotocol/server-gitlab` (npm) | + +#### Vendor-hosted SaaS servers — all Streamable HTTP + +Survey of mainstream public endpoints (2026-Q2). Every active vendor endpoint surveyed is Streamable HTTP. The Atlassian SSE URL is the lone holdout and has a published sunset date. + +| Vendor | Endpoint | Transport | Notes | +|---|---|---|---| +| Atlassian (Rovo) | `https://mcp.atlassian.com/v1/mcp` | Streamable HTTP | Legacy SSE at `/v1/sse` sunset **2026-06-30** | +| Figma | `https://mcp.figma.com/mcp` | Streamable HTTP | OAuth via Figma account | +| Linear | `https://mcp.linear.app/mcp` | Streamable HTTP | OAuth | +| Notion | `https://mcp.notion.com/mcp` | Streamable HTTP | OAuth | +| Sentry | `https://mcp.sentry.dev/mcp` | Streamable HTTP | OAuth | +| Supabase | `https://mcp.supabase.com/mcp` | Streamable HTTP | OAuth | +| HubSpot | `https://mcp.hubspot.com/anthropic` | Streamable HTTP | OAuth | +| Slack | (vendor-hosted) | Streamable HTTP | OAuth | +| Stripe | hosted (see Stripe MCP docs for current path) | Streamable HTTP | API key | +| Cloudflare | multiple endpoints under `*.mcp.cloudflare.com` | Streamable HTTP | OAuth (workers/dns/r2/...) | +| Vercel | `https://mcp.vercel.com/` | Streamable HTTP | OAuth | +| Neon | `https://mcp.neon.tech/` | Streamable HTTP | OAuth | + +**Cover map**: stdio + Streamable HTTP covers all mainstream public MCP endpoints surveyed as of 2026-Q2. SSE-only deployments are legacy fixtures with vendor sunsets in progress; deferred to a hypothetical v2. + +--- + +## 4. Design Decision + +### 4.1 Architectural alternatives compared + +**Alternative A — Naïve flat in-core.** Every MCP tool from every connected server becomes a top-level entry in `tool_definitions()`. Surface explodes from 4 → 150+ tools; system prompt grows ~500 → ~17,000 tokens (5 servers × ~20 tools each, ~160 tokens per descriptor). Hermes Agent and opencode both pay this cost; StackOne benchmarks (§3.7) show tool-selection accuracy drops sharply under naïve flat surfaces. + +**Alternative B — Sidecar / plugin process.** Spawn a separate `openab-mcp-bridge` binary; agent core has no `rmcp` dependency; communicate via stdio JSON-RPC. RAM saving is 1-2 MB on a 15-40 MB baseline — noise — but the bridge process itself adds ~15 MB and inherits opencode's documented sidecar failure modes (#11868 113 GB leak / #7261 orphan processes / #13041 per-session duplication). Cost ≫ benefit (see §7). + +**Alternative C — CHOSEN: in-core `rmcp` + progressive-disclosure meta-tool.** `rmcp` enters `Cargo.toml`. Tool surface grows by exactly **1 tool**: `mcp`. All MCP capability (server enumeration, tool discovery, invocation, status) flows through that single tool's `action` field. System prompt grows ~500 → ~600 tokens (+100 for the meta-tool blurb). + +### 4.2 Why C honors openab-agent design principles + +| Principle (`docs/adr/openab-agent.md` §2) | A (flat) | B (sidecar) | **C (chosen)** | +|---|:---:|:---:|:---:| +| Minimal tool surface (4 tools) | ⛔ 150+ | ✅ 4 | ✅ 5 | +| Tiny system prompt (~500 tokens) | ⛔ ~17K | ✅ ~500 | ⚠️ ~600 (+100 over budget; accepted as smallest viable surface) | +| No SDK dependency | ⛔ rmcp | ✅ none | ⚠️ rmcp (+1-2 MB binary, see §7) | +| Multi-model | ✅ | ✅ | ✅ | + +C concedes the "no SDK dependency" principle for a 1-2 MB binary cost. §7 shows that cost is dominated by child-process RAM (5-80 MB per server, depending on implementation language) regardless of architecture, so the concession is dwarfed by usage cost. + +### 4.3 Symmetry with Skills (PR #955) + +Skills is openab's existing "first-tier-but-tiny" extension. The mapping is exact: + +``` +┌────────────────────────────┬─────────────────────────────────────┐ +│ Skills (224 LOC, in-core) │ MCP (proposed, in-core) │ +├────────────────────────────┼─────────────────────────────────────┤ +│ Inject metadata only │ Inject 1 meta-tool only │ +│ (name + description) │ (name + action sketch) │ +├────────────────────────────┼─────────────────────────────────────┤ +│ Body load via `read` tool │ Server connect via `mcp` tool │ +│ on agent's demand │ on agent's demand (lazy connect) │ +├────────────────────────────┼─────────────────────────────────────┤ +│ ~100 tokens / 10 skills │ ~100 tokens / N servers │ +├────────────────────────────┼─────────────────────────────────────┤ +│ No new crate dep │ Adds rmcp (1-2 MB binary delta) │ +└────────────────────────────┴─────────────────────────────────────┘ +``` + +Skills' authors weighed "simple in-core mechanism vs plugin abstraction" and chose in-core. The same trade-off applies to MCP: plugin abstraction is ~10× the complexity for negligible RAM saving. + +--- + +## 5. Detailed Design + +### 5.1 Tool surface (4 + 1) + +``` +openab-agent/src/tools.rs::tool_definitions() returns 5 entries: + + [ "read" ] ─── existing, unchanged + [ "write" ] ─── existing, unchanged + [ "edit" ] ─── existing, unchanged + [ "bash" ] ─── existing, unchanged + [ "mcp" ] ─── NEW +``` + +### 5.2 The `mcp` meta-tool schema + +```jsonc +{ + "name": "mcp", + "description": "Interact with configured MCP servers. Use action='help' for usage.", + "input_schema": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["help", "list_servers", "list_tools", + "describe_tool", "call", "status"] + }, + "server": { "type": "string" }, + "tool": { "type": "string" }, + "arguments": { "type": "object" } + }, + "required": ["action"] + } +} +``` + +Per-action contract: + +| action | required fields | returns | +|---|---|---| +| `help` | — | usage doc string | +| `list_servers` | — | `[{ name, status, transport, tools_count }]` | +| `list_tools` | `server` | `[{ name, description }]` | +| `describe_tool` | `server`, `tool` | `{ name, description, input_schema }` | +| `call` | `server`, `tool`, `arguments` | tool's `CallToolResult` | +| `status` | `server?` | per-server health / last error / OAuth state | + +### 5.3 Agent loop interaction + +Typical multi-turn usage (lazy connect at first need, idle eviction after TTL): + +- **Turn 1** — LLM calls `mcp(action: "list_servers")`; no IO, served from config cache. Returns `["github (stdio)", ...]`. +- **Turn 2** — LLM calls `mcp(action: "list_tools", server: "github")`; `lazy_connect("github")` spawns child proc, `peer.list_all_tools()` fetches descriptors. Returns `[{name, description}, ...]`. +- **Turn 3** — LLM calls `mcp(action: "call", server, tool, arguments)`; `peer.call_tool()` invokes. Returns `CallToolResult`. +- **Idle (no MCP call for `idle_ttl`)** — `IdleEvictor` shuts down child proc, drops Peer; config + descriptor cache retained for fast re-connect. + +### 5.4 Module layout + +``` +openab-agent/src/ +├── agent.rs (existing — add 1 match arm in execute_tool) +├── auth.rs (existing — TokenStore reused by mcp/oauth.rs) +├── llm.rs (existing — UNCHANGED, ToolDef is already generic) +├── tools.rs (existing — add `mcp` to tool_definitions()) +├── skills.rs (existing — UNCHANGED) +└── mcp/ (NEW module) + ├── mod.rs (public: McpRuntimeManager, dispatch()) + ├── config.rs (mcpServers schema, ${env:VAR} interpolation) + ├── runtime.rs (per-server lifecycle, lazy connect, idle TTL) + ├── meta_tool.rs (action dispatch: list_servers / list_tools / ...) + ├── oauth.rs (uses src/auth.rs TokenStore; built-in providers) + └── breaker.rs (circuit breaker per server) +``` + +Estimated total: **500-750 LOC** (no `reload.rs`; per-session refresh handled by `McpRuntimeManager::new()` re-reading config at session start). `llm.rs` is unchanged because both Anthropic and OpenAI Responses providers consume the generic `ToolDef` abstraction. + +### 5.5 `rmcp` dependency & features + +```toml +# openab-agent/Cargo.toml +[dependencies] +rmcp = { version = "1.7", default-features = false, features = [ + "client", + "transport-child-process", + "transport-streamable-http-client-reqwest", + "auth", +] } +``` + +- `client` only — we host nothing +- `transport-child-process` — stdio servers (majority of registry, see §3.8) +- `transport-streamable-http-client-reqwest` — vendor-hosted SaaS endpoints (reqwest is already a transitive dep) +- `auth` — OAuth helpers +- `default-features = false` — avoid pulling SSE / server features we don't need (SSE intentionally omitted per §3.8 — superseded by Streamable HTTP in MCP spec 2025-11-25, all surveyed vendors migrated or migrating) + +Binary delta estimate: **+1-2 MB** (see §7). + +### 5.6 Config schema + +Single root key `mcpServers` to match Claude Code / Codex / Cursor / Cline convention. Loaded from `.openab/agent/mcp.json` (project) and `~/.openab/agent/mcp.json` (global), project-local takes precedence on name collision. + +```jsonc +{ + "mcpServers": { + "github": { + "type": "stdio", + "command": "github-mcp-server", + "args": ["--repo-token", "${env:GITHUB_TOKEN}"], + "env": { "GH_HOST": "github.com" } + }, + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + }, + "fs": { + "type": "stdio", + "command": "mcp-server-filesystem", + "args": ["/workspace"], + "tool_filter": { "include": ["read_*", "list_*"] } + } + } +} +``` + +- `${env:VAR}` interpolation matches Cursor / Cline; missing var = startup error for that server (others continue) +- `tool_filter` supports `include` / `exclude` glob lists (lifted from OpenHands' `filter_tools_regex`) +- Per-server failure isolated — one bad server does not block agent boot + +### 5.7 Lifecycle + +``` + ┌─────────────────────────────────────┐ + │ McpRuntimeManager (1 per agent) │ + │ │ + │ config: Arc │ + │ servers: Map │ + │ idle_ttl: Duration (default 10m) │ + └─────────────────────────────────────┘ + │ + │ on first call needing server X: + ▼ + ┌─────────────────────────────────────┐ + │ ServerHandle (lazy) │ + │ │ + │ state: Disconnected | Connecting | │ + │ Connected(Peer) | Failed | │ + │ NeedsAuth │ + │ last_used: Instant │ + │ breaker: CircuitBreaker │ + │ tools_cache: Vec │ + └─────────────────────────────────────┘ + │ + ┌─────────────────┼─────────────────┐ + │ │ + ┌───────────┐ ┌───────────┐ + │ child proc│ │ HTTP conn │ + │ (stdio) │ │ (reqwest) │ + └───────────┘ └───────────┘ +``` + +- **Lazy connect**: server is `Disconnected` at boot; transitions to `Connecting → Connected` on first action needing it +- **Idle eviction**: background task evicts servers idle > `idle_ttl` (default 10m, configurable per server). State drops to `Disconnected`; tools cache retained for fast re-connect +- **No per-thread isolation**: agent is single-thread-per-session; openab broker handles thread-level concurrency upstream +- **Connection reuse**: while connected, all `mcp call` actions reuse the same `Peer` + +### 5.8 Config refresh model + +Rather than file-watching mid-session, openab-agent re-reads `mcp.json` at session boundaries: + +- **New ACP session** → `McpRuntimeManager::new()` parses `mcp.json` from scratch; ~5 LOC of glue, zero hot-path code +- **Mid-session config edit** → not visible until next session; users re-open the Discord/Slack thread (cheap in openab's per-thread session model) +- **Process restart** → applies config changes globally; rarely needed because broker spawns short-lived agent processes per session + +This drops `notify` crate + lease counter + diff applier (~150 LOC, race-condition hotspot) for an 80% UX coverage. Hermes' `/reload-mcp` slash command (§3.1) is the precedent for "explicit user-triggered reload >> implicit file watcher" in a coding-agent context. + +### 5.9 Error isolation & circuit breaker + +Adopted from Hermes Agent (the only surveyed project that ships one): + +``` + ┌─────────────────────────────────────────┐ + │ CircuitBreaker (per server) │ + │ │ + │ state: Closed | Open | HalfOpen │ + │ fail_threshold: 3 (configurable) │ + │ cooldown: 60s (configurable) │ + └─────────────────────────────────────────┘ + │ + ┌───────────────────────┼───────────────────────┐ + │ │ │ + ▼ ▼ ▼ + 3 fails in 30s 60s elapsed 1 success + ─────────────► ─────────────► ─────────────► + Closed → Open Open → HalfOpen HalfOpen → Closed + (allow 1 probe) + │ + │ probe fails + ▼ + HalfOpen → Open + (reset cooldown) +``` + +While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s remaining"}` immediately — no child-process resurrection attempts, no LLM hang. + +`rmcp` error model maps cleanly: + +| `rmcp` error | meta-tool response | Counts toward breaker? | +|---|---|---| +| `ServiceError::McpError` (protocol) | `{ error: msg, code }` | No (server-level intent) | +| `ServiceError::TransportSend/Closed` | `{ error: "transport", server: ... }` | Yes | +| `CallToolResult { isError: true }` | passed through as result | No (tool-level) | + +--- + +## 6. OAuth + +### 6.1 Shared TokenStore + +`openab-agent/src/auth.rs` already implements hand-rolled PKCE for Codex (`CODEX_AUTHORIZE_URL`, port 1455). The TokenStore (`~/.openab/agent/auth.json`, 0o600) is reused — `mcp/oauth.rs` calls into the same store with namespaced keys (`mcp:` vs `codex`). + +### 6.2 Built-in providers (Phase 2) + +| Provider | Auth URL | Token URL | Callback | Scopes | +|---|---|---|---|---| +| `anthropic-mcp` | `https://claude.ai/oauth/authorize` | `https://platform.claude.com/v1/oauth/token` | `localhost:53692/callback` | `org:create_api_key user:profile user:inference user:sessions:claude_code user:mcp_servers user:file_upload` (subset varies per use) | +| `github-copilot` | (existing pi/anthropic flow) | existing | existing | existing | +| `generic` | from `mcpServers[name].oauth.authorize_url` | from `.oauth.token_url` | dynamically allocated port | from `.oauth.scopes` | + +### 6.3 Custom provider extension point + +Config can declare `oauth: { authorize_url, token_url, client_id, scopes }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. + +--- + +## 7. Memory Impact Analysis + +Included because the sidecar alternative (§4.1 B) was motivated by memory. + +`openab-agent` baseline is 15-40 MB RSS. `rmcp` with the §5.5 feature set adds +1-2 MB binary delta and +0 MB idle RSS (no servers configured). Once servers connect, child processes dominate: Go ~10-20 MB, Rust ~5-10 MB, Python/Node ~30-80 MB each. + +| Aspect | A. Naïve flat | B. Sidecar | **C. In-core + meta-tool** | +|---|---|---|---| +| Idle RAM delta | +1-2 MB | +0 MB | +1-2 MB | +| Per-server RAM | +5-80 MB (child) | +15 MB bridge + 5-80 MB | +5-80 MB | +| System prompt tokens | +17,000 | +600 (if sidecar discloses lazily) | +600 | +| Lifecycle complexity | Medium | High (2 procs, IPC, version skew) | Medium | +| Crash blast radius | Bad server kills loop | Bridge crash = all gone | Bad server isolated | + +The 1-2 MB sidecar saving is dominated by per-server child RAM (identical across architectures) and by token cost (identical *as long as progressive disclosure is used*). Memory does not justify the sidecar. + +--- + +## 8. CLI Surface + +``` +openab-agent mcp list — show configured servers + status +openab-agent mcp status [server] — health, last error, OAuth state +openab-agent mcp add — append a stdio server to config +openab-agent mcp add --url — append an http server +openab-agent mcp remove — remove a server from config +openab-agent mcp login — run OAuth flow for a server +openab-agent mcp refresh — force-refresh OAuth token +openab-agent mcp test [json] — invoke a tool from CLI (debug) +openab-agent mcp doctor — diagnose config, network, auth +``` + +Subcommand placement under existing `openab-agent` binary — no new binary. CLI is a thin wrapper over `McpRuntimeManager` to keep the same code path validated by both LLM-driven and human-driven flows. + +--- + +## 9. Rollout Plan + +~6 weeks across three phases: + +1. **Foundation (3w)** — `rmcp` + stdio + meta-tool + minimal CLI, behind `--features mcp` +2. **Network & auth (2w)** — Streamable HTTP transport + OAuth providers + `login`/`refresh` CLI; promote flag default-on +3. **Resilience (1w)** — circuit breaker + `doctor` CLI; remove flag + +Week-by-week task breakdown lives on the tracking issue (filed at PR open). + +--- + +## 10. Open Questions + +1. **Should `mcp.json` live in the agent or the broker?** Agent owns its own config today; broker's `[agent].inherit_cloud_mcp_servers` (issue #753) is a separate concern. Proposal: agent reads `mcp.json` directly; broker can layer additional servers via env or kubectl ConfigMap. **Owner**: needs broker-team alignment. +2. **Native-agent feature parity with broker-forward path.** PRs #329/#330/#345/#903 attempted broker-side MCP forwarding to backing CLIs. With native MCP in openab-agent, do we deprecate that path, keep it for non-native CLIs, or unify? Proposal: native agent uses its own MCP runtime; broker continues to forward to backing CLIs that lack native MCP (Cursor, Copilot). **Owner**: broker-team. + +Resolved at design time (tracked in tracking issue, not open): tool-naming prefix (`_` single-underscore, matching Hermes §3.1 / opencode §3.2 convention), `session/load` re-enumeration (process-local state, re-read), per-tool permission gates (post-Phase-3 opt-in flag), `resources`/`prompts` capabilities (v2). + +--- + +## 11. References + +### Internal + +- `docs/adr/openab-agent.md` — agent charter, design principles cited in §4.2 +- `docs/adr/pr-contribution-guidelines.md` — prior-art requirements followed in §3 +- `openab-agent/src/skills.rs` (PR #955) — extension-pattern precedent cited in §4.3 +- `openab-agent/src/auth.rs` — TokenStore reused in §6.1 +- PRs #329, #330, #345, #903 — closed broker-forward attempts, §1.3 +- Issue #753 — broker-side MCP opt-out (out of scope) +- PR #951 — SessionPool persisted-mapping fix (informs §10 resolved-at-design-time list) + +### External — projects + +- Hermes Agent: https://github.com/NousResearch/hermes-agent +- opencode: https://github.com/anomalyco/opencode (formerly https://github.com/sst/opencode) +- pi-mcp-adapter: https://github.com/nicobailon/pi-mcp-adapter +- Goose: https://github.com/aaif-goose/goose (formerly https://github.com/block/goose) +- OpenHands: https://github.com/OpenHands/OpenHands +- rmcp: https://github.com/modelcontextprotocol/rust-sdk +- OpenClaw (evaluated per `pr-contribution-guidelines.md`, scope not applicable — see §3; canonical repo URL not publicly resolvable, internal reference via avasdream blog cited in guidelines) + +### External — specs & research + +- MCP spec: https://modelcontextprotocol.io +- MCP spec changelog 2025-11-25 (Streamable HTTP supersedes HTTP+SSE): https://modelcontextprotocol.io/specification/2025-11-25/basic/transports +- MCP SEP-1576 — Mitigating Token Bloat in MCP: https://github.com/modelcontextprotocol/modelcontextprotocol/issues/1576 +- Atlassian Rovo MCP SSE→Streamable HTTP migration notice (sunset 2026-06-30): https://community.atlassian.com/forums/Rovo-articles/Migrating-from-Atlassian-s-MCP-Server-SSE-to-Streamable-HTTP/ba-p/3092878 +- Figma MCP server (Streamable HTTP): https://help.figma.com/hc/en-us/articles/32132100833559-Guide-to-the-Dev-Mode-MCP-Server +- Anthropic — Equipping agents for the real world with Agent Skills: https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills +- Anthropic — Code execution with MCP: https://www.anthropic.com/engineering/code-execution-with-mcp +- Simon Willison — Claude Skills (2025-10-16): https://simonwillison.net/2025/Oct/16/claude-skills/ +- StackOne — MCP Token Optimization: https://www.stackone.com/blog/mcp-token-optimization/ +- opencode issues cited in §3.2, §4.1, §7: #11868, #7261, #13041 From fdd8738c80905440ea5a6c4601aaaa12074e90a7 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 16:38:41 +0000 Subject: [PATCH 02/98] docs(adr): unify MCP OAuth flows, add device-code preference and Fargate guardrails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - §6.4 flow selection: device-code preferred (matches existing CLI convention), paste-back universal fallback, browser laptop opt-in - §5.2 add login/complete_login meta-tool actions; login returns flow-tagged union - §6.1 spell out TokenStore persistence assumption + cold-start refresh - §6.3 device_authorization_endpoint extension point + RFC 8414 discovery - §3.8 stdio container-image caveat (interpreter required for Node/Python) - §5.7 max_concurrent_servers knob (default 10, see §7 for constrained tuning) - §7 Fargate 512MB/1GB OOM analysis + mitigations Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 64 ++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index bd078f604..f0d8848c2 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -208,6 +208,8 @@ Anthropic reference + community packages. All ship as `command + args`; no netwo | `mcp-server-time` | Rust | `mcp-server-time` (cargo) | | `mcp-server-gitlab` | Node | `@modelcontextprotocol/server-gitlab` (npm) | +**Container-image caveat for headless deployments**: Node/Python stdio servers require the corresponding interpreter (`node`, `python3`, `uvx`, `npx`) in the image. The openab base image ships none. Operators running openab-agent in headless environments (Fargate, Kubernetes pods, CI) must either bake the interpreter into a derived image or limit `mcpServers` to Go/Rust binaries (column above). A misconfigured server fails in isolation per §5.9. + #### Vendor-hosted SaaS servers — all Streamable HTTP Survey of mainstream public endpoints (2026-Q2). Every active vendor endpoint surveyed is Streamable HTTP. The Atlassian SSE URL is the lone holdout and has a published sunset date. @@ -302,11 +304,13 @@ openab-agent/src/tools.rs::tool_definitions() returns 5 entries: "action": { "type": "string", "enum": ["help", "list_servers", "list_tools", - "describe_tool", "call", "status"] + "describe_tool", "call", "status", + "login", "complete_login"] }, - "server": { "type": "string" }, - "tool": { "type": "string" }, - "arguments": { "type": "object" } + "server": { "type": "string" }, + "tool": { "type": "string" }, + "arguments": { "type": "object" }, + "redirect_url": { "type": "string" } }, "required": ["action"] } @@ -323,6 +327,8 @@ Per-action contract: | `describe_tool` | `server`, `tool` | `{ name, description, input_schema }` | | `call` | `server`, `tool`, `arguments` | tool's `CallToolResult` | | `status` | `server?` | per-server health / last error / OAuth state | +| `login` | `server` | `{ flow: "device", user_code, verification_url, ... }` or `{ flow: "paste", authorize_url, state, ... }` — see §6.4 | +| `complete_login` | `server`, `redirect_url` | `{ ok: true }` or `{ error }` — paste flow only; device flow polls internally | ### 5.3 Agent loop interaction @@ -412,9 +418,10 @@ Single root key `mcpServers` to match Claude Code / Codex / Cursor / Cline conve ┌─────────────────────────────────────┐ │ McpRuntimeManager (1 per agent) │ │ │ - │ config: Arc │ - │ servers: Map │ - │ idle_ttl: Duration (default 10m) │ + │ config: Arc │ + │ servers: Map │ + │ idle_ttl: Duration (10m) │ + │ max_concurrent: usize (10) │ └─────────────────────────────────────┘ │ │ on first call needing server X: @@ -440,7 +447,7 @@ Single root key `mcpServers` to match Claude Code / Codex / Cursor / Cline conve - **Lazy connect**: server is `Disconnected` at boot; transitions to `Connecting → Connected` on first action needing it - **Idle eviction**: background task evicts servers idle > `idle_ttl` (default 10m, configurable per server). State drops to `Disconnected`; tools cache retained for fast re-connect -- **No per-thread isolation**: agent is single-thread-per-session; openab broker handles thread-level concurrency upstream +- **Concurrency cap**: `max_concurrent_servers` bounds simultaneously-`Connected` servers (default 10; see §7 for constrained-env tuning). When at cap, the LRU connected server is force-evicted before connecting a new one - **Connection reuse**: while connected, all `mcp call` actions reuse the same `Peer` ### 5.8 Config refresh model @@ -498,6 +505,10 @@ While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s rem `openab-agent/src/auth.rs` already implements hand-rolled PKCE for Codex (`CODEX_AUTHORIZE_URL`, port 1455). The TokenStore (`~/.openab/agent/auth.json`, 0o600) is reused — `mcp/oauth.rs` calls into the same store with namespaced keys (`mcp:` vs `codex`). +**Persistence assumption**: TokenStore is treated as persistent state. Deployments must mount `~/.openab/` on durable storage — hostPath / PVC (k8s work-agents), volume + S3 sync (Fargate Mira), or developer-laptop home directory. Ephemeral container filesystems force a re-bootstrap on every restart and are not a supported configuration. + +**Cold-start refresh**: on process start the runtime reads TokenStore lazily (on first `mcp call` per server). Expired access tokens trigger an in-process refresh via the stored refresh token; success updates the store and proceeds transparently. Refresh failure (revoked / expired refresh token) flips the server's state to `NeedsAuth` (§5.7); the next `mcp call` returns an error that prompts the LLM to re-run the §6.4 login flow. No human interaction is required as long as the refresh token remains valid. + ### 6.2 Built-in providers (Phase 2) | Provider | Auth URL | Token URL | Callback | Scopes | @@ -506,9 +517,40 @@ While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s rem | `github-copilot` | (existing pi/anthropic flow) | existing | existing | existing | | `generic` | from `mcpServers[name].oauth.authorize_url` | from `.oauth.token_url` | dynamically allocated port | from `.oauth.scopes` | +Callback values apply when the browser flow is engaged (`--browser` / `$DISPLAY` set), and when the agent-guided paste-back branch of §6.4 is selected (user copies the redirect URL from the browser URL bar). The device-code branch of §6.4 ignores the callback entirely. + ### 6.3 Custom provider extension point -Config can declare `oauth: { authorize_url, token_url, client_id, scopes }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. +Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set (or RFC 8414 `/.well-known/oauth-authorization-server` advertises one), §6.4 device-code flow is preferred over paste-back. + +### 6.4 Agent-guided OAuth flow (default) + +openab-agent's primary deployment surface is containerized (k8s pods, Fargate tasks) where `localhost:53692/callback` is unreachable and there is no display to open. Two non-browser flows are supported; the runtime picks per server based on capability. Browser-callback remains a laptop-only opt-in (`$DISPLAY` set, or `--browser` passed to `openab-agent mcp login`). + +**Selection logic** (on `mcp(action: "login", server: X)`): + +1. If `X` declares an `oauth.device_authorization_endpoint` in config (§6.3) — or if RFC 8414 discovery against the server's authorize URL advertises one — runtime uses **device-code flow** (RFC 8628). Matches openab's existing CLI convention (`claude auth login`, `codex --device-auth`, `grok --device-auth`). +2. Else runtime uses **paste-back flow** (standard auth-code + PKCE). Universal fallback for OAuth 2.1 servers without a device endpoint (Linear, Notion, Figma, Sentry, ...). + +**Device-code flow** (typically platform OAuth: Anthropic, OpenAI, xAI): + +- `login` returns `{ flow: "device", user_code, verification_url, expires_in }`. Agent relays to chat: "Open `https://example.com/device`, enter code: `ABCD-EFGH`". +- Runtime polls the token endpoint in background (5s interval, RFC 8628 §3.5). On success, persists tokens under `mcp:X`, transitions server to `Connected`. +- LLM checks `mcp(action: "status", server: X)` to learn when ready; `complete_login` not required for this branch. + +**Paste-back flow** (typically MCP SaaS: Linear, Notion, Figma, ...): + +- `login` returns `{ flow: "paste", authorize_url, state }`. Runtime persists transient `{verifier, state}` in TokenStore. Agent relays to chat: "Open this link, sign in, paste the URL you land on back here". +- User pastes the URL as next chat message; LLM calls `mcp(action: "complete_login", server: X, redirect_url: "...")`. +- Runtime parses `code` + `state`, validates `state`, performs PKCE token exchange against `token_url`, persists tokens under `mcp:X`, drops transient state. + +**Security** (both flows): + +- Device-code `user_code` is short-lived (RFC 8628 §3.2, typically ≤10 min); an attacker who sees the code in chat must also race the polling loop and prove device ownership. +- Paste-back redirect URL carries only the authorization code (OAuth 2.1 PKCE; implicit/hybrid removed); code is single-use + ≤10 min; PKCE verifier held in-process makes intercepted codes unusable. +- Token exchange happens entirely inside the agent process; the chat channel never carries access or refresh tokens. Refresh rotation runs in-process per §6.1. + +`openab-agent/src/auth.rs` already ships all three paths for Codex OAuth (browser L150-244, paste-back L165-201, device L328-440). This ADR generalizes that pattern across MCP servers and centralizes flow selection on per-server capability rather than per-CLI hard-coding. OpenHands notes the same headless-OAuth incompatibility (§3.5) without shipping a fix. --- @@ -528,6 +570,8 @@ Included because the sidecar alternative (§4.1 B) was motivated by memory. The 1-2 MB sidecar saving is dominated by per-server child RAM (identical across architectures) and by token cost (identical *as long as progressive disclosure is used*). Memory does not justify the sidecar. +**Constrained-environment note (Fargate / small Kubernetes pods).** Fargate Spot tasks at 512 MB / 1 GB have no swap; OOMKill is hard. Worst-case stack — agent baseline 40 MB + 5 Node/Python stdio servers at 80 MB each + LLM context buffers — sums to ~440-540 MB, which trips a 512 MB task before any prompt processing. Two mitigations: (a) lower `max_concurrent_servers` to 3 in `mcp.json` (§5.7), bounding worst case to ~280 MB; (b) prefer Go/Rust stdio servers (5-20 MB) or HTTP servers (0 MB local) over Node/Python interpreters. The `mcp doctor` CLI (§8) flags configurations whose worst-case sum exceeds the cgroup limit. + --- ## 8. CLI Surface @@ -538,7 +582,7 @@ openab-agent mcp status [server] — health, last error, OAuth state openab-agent mcp add — append a stdio server to config openab-agent mcp add --url — append an http server openab-agent mcp remove — remove a server from config -openab-agent mcp login — run OAuth flow for a server +openab-agent mcp login [--browser] — run OAuth flow (see §6.4; --browser opts into localhost callback) openab-agent mcp refresh — force-refresh OAuth token openab-agent mcp test [json] — invoke a tool from CLI (debug) openab-agent mcp doctor — diagnose config, network, auth From 47363a27e6e2574f918f90540f9b35b2a8e98532 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 16:57:35 +0000 Subject: [PATCH 03/98] =?UTF-8?q?docs(adr):=20harden=20MCP=20OAuth=20?= =?UTF-8?q?=E2=80=94=20opt-in=20discovery=20+=20force-flush=20write=20cont?= =?UTF-8?q?ract?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - §6.4 RFC 8414 discovery disabled by default; opt-in requires oauth.discovery=true + oauth.discovery_allowlist (boot rejects otherwise). Rationale: awsvpc egress + SSRF surface in multi-tenant deployments. - §6.1 add RTR race warning: async persistence layers (Fargate S3 sync, eventually-consistent volumes) must flush new tokens to durable storage before Spot interruption, else cascade-revoke locks the user out. Contract: fsync(2) agent-side + mtime-event-driven sync deployment-side. - §6.3 expose oauth.discovery / oauth.discovery_allowlist on custom providers. Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index f0d8848c2..fc4c895b3 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -509,6 +509,12 @@ While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s rem **Cold-start refresh**: on process start the runtime reads TokenStore lazily (on first `mcp call` per server). Expired access tokens trigger an in-process refresh via the stored refresh token; success updates the store and proceeds transparently. Refresh failure (revoked / expired refresh token) flips the server's state to `NeedsAuth` (§5.7); the next `mcp call` returns an error that prompts the LLM to re-run the §6.4 login flow. No human interaction is required as long as the refresh token remains valid. +**Refresh-token rotation race with async persistence layers**: OAuth 2.1 servers issue a new refresh token on every rotation and immediately revoke the previous one; reuse of a revoked refresh token is treated as a replay attack and cascade-revokes the entire token chain. Deployments where TokenStore persistence is asynchronous (Fargate S3 sidecar sync, eventually-consistent volumes) must flush new tokens to durable storage *before* the agent can be killed — otherwise a Spot interruption between local write and remote sync restores the revoked token from S3 on the next task and locks the user out. Contract: + +- **Agent side**: `TokenStore` calls `fsync(2)` after every write to `auth.json` +- **Deployment side**: the S3 / volume sync layer must trigger on `auth.json` mtime change (`inotify` / `fsnotify` event), not poll on a cron. Cron-driven sync (≥1 min interval) is incompatible with refresh-token rotation under Spot interruption +- **Reference deployment**: Mira (openab-ecs Fargate Spot) `mira-home/` S3 sync configuration + ### 6.2 Built-in providers (Phase 2) | Provider | Auth URL | Token URL | Callback | Scopes | @@ -521,7 +527,7 @@ Callback values apply when the browser flow is engaged (`--browser` / `$DISPLAY` ### 6.3 Custom provider extension point -Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set (or RFC 8414 `/.well-known/oauth-authorization-server` advertises one), §6.4 device-code flow is preferred over paste-back. +Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint?, discovery?, discovery_allowlist? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set, §6.4 device-code flow is preferred over paste-back. RFC 8414 dynamic discovery is opt-in only and requires an allowlist — see §6.4. ### 6.4 Agent-guided OAuth flow (default) @@ -529,9 +535,11 @@ openab-agent's primary deployment surface is containerized (k8s pods, Fargate ta **Selection logic** (on `mcp(action: "login", server: X)`): -1. If `X` declares an `oauth.device_authorization_endpoint` in config (§6.3) — or if RFC 8414 discovery against the server's authorize URL advertises one — runtime uses **device-code flow** (RFC 8628). Matches openab's existing CLI convention (`claude auth login`, `codex --device-auth`, `grok --device-auth`). +1. If `X` declares an `oauth.device_authorization_endpoint` in config (§6.3), runtime uses **device-code flow** (RFC 8628). Matches openab's existing CLI convention (`claude auth login`, `codex --device-auth`, `grok --device-auth`). 2. Else runtime uses **paste-back flow** (standard auth-code + PKCE). Universal fallback for OAuth 2.1 servers without a device endpoint (Linear, Notion, Figma, Sentry, ...). +RFC 8414 dynamic discovery (`/.well-known/oauth-authorization-server`) is **disabled by default**. Operators opt in per-server via `oauth.discovery: true` plus an explicit `oauth.discovery_allowlist` of permitted domains (e.g. `["*.anthropic.com"]`); boot rejects `discovery: true` without an allowlist. Rationale: awsvpc egress restrictions + SSRF surface in multi-tenant deployments. + **Device-code flow** (typically platform OAuth: Anthropic, OpenAI, xAI): - `login` returns `{ flow: "device", user_code, verification_url, expires_in }`. Agent relays to chat: "Open `https://example.com/device`, enter code: `ABCD-EFGH`". From 32176fc1c3cce9ae5e4c999789b343da79bfe217 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 17:35:38 +0000 Subject: [PATCH 04/98] feat(openab-agent/mcp): scaffold MCP module + mcpServers config loader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 foundation slice per ADR §5.4 + §5.6: - Add optional `rmcp 1.7` dep + `mcp` feature flag (default off) - Wire `#[cfg(feature = "mcp")] mod mcp;` into main - New `mcp/config.rs`: `McpConfig` / `ServerConfig` (Stdio | Http) / `ToolFilter` / `OAuthConfig`, global+project layered load, project precedence on name collision, `${env:VAR}` interpolation with per-server error context OAuth fields limited to `provider` + `scopes`; custom-provider endpoints (§6.3) deferred to Phase 2 auth slice to avoid dead schema. Co-Authored-By: Claude Opus 4.7 --- openab-agent/Cargo.toml | 10 ++ openab-agent/src/main.rs | 2 + openab-agent/src/mcp/config.rs | 261 +++++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 3 + 4 files changed, 276 insertions(+) create mode 100644 openab-agent/src/mcp/config.rs create mode 100644 openab-agent/src/mcp/mod.rs diff --git a/openab-agent/Cargo.toml b/openab-agent/Cargo.toml index f059cfc6a..72edda354 100644 --- a/openab-agent/Cargo.toml +++ b/openab-agent/Cargo.toml @@ -21,9 +21,19 @@ getrandom = "0.4.2" urlencoding = "2.1.3" open = "5.3.5" url = "2.5.8" +rmcp = { version = "1.7", default-features = false, optional = true, features = [ + "client", + "transport-child-process", + "transport-streamable-http-client-reqwest", + "auth", +] } [target.'cfg(unix)'.dependencies] libc = "0.2" +[features] +default = [] +mcp = ["dep:rmcp"] + [dev-dependencies] tempfile = "3" diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index a37693079..f5c47f2e8 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -2,6 +2,8 @@ mod acp; mod agent; mod auth; mod llm; +#[cfg(feature = "mcp")] +mod mcp; mod skills; mod tools; diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs new file mode 100644 index 000000000..cba5f8fd1 --- /dev/null +++ b/openab-agent/src/mcp/config.rs @@ -0,0 +1,261 @@ +//! `mcpServers` config schema + loader. See ADR §5.6. +//! +//! Loaded from `.openab/agent/mcp.json` (project) and `~/.openab/agent/mcp.json` +//! (global), project entries take precedence on name collision. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, anyhow}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct McpConfig { + #[serde(rename = "mcpServers", default)] + pub servers: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ServerConfig { + Stdio { + command: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, + #[serde(default, rename = "tool_filter")] + tool_filter: Option, + }, + Http { + url: String, + #[serde(default)] + oauth: Option, + #[serde(default, rename = "tool_filter")] + tool_filter: Option, + }, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct ToolFilter { + #[serde(default)] + pub include: Vec, + #[serde(default)] + pub exclude: Vec, +} + +/// OAuth block. Phase 1 only parses `provider` + `scopes`; custom-provider +/// fields (§6.3: `authorize_url`, `token_url`, `device_authorization_endpoint`, +/// `discovery`, `discovery_allowlist`) land with the Phase 2 auth slice. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OAuthConfig { + #[serde(default)] + pub provider: Option, + #[serde(default)] + pub scopes: Vec, +} + +impl McpConfig { + /// Load + merge global and project configs from the standard locations. + /// Missing files are treated as empty. + pub fn load() -> Result { + let global = home_dir().map(|h| h.join(".openab/agent/mcp.json")); + let project = std::env::current_dir() + .ok() + .map(|c| c.join(".openab/agent/mcp.json")); + Self::load_layered(global.as_deref(), project.as_deref()) + } + + /// Load + merge two layers; project wins on name collision. + pub fn load_layered(global: Option<&Path>, project: Option<&Path>) -> Result { + let mut merged = Self::default(); + for path in [global, project].into_iter().flatten() { + if !path.exists() { + continue; + } + let layer = Self::load_file(path)?; + merged.servers.extend(layer.servers); + } + Ok(merged) + } + + fn load_file(path: &Path) -> Result { + let raw = std::fs::read_to_string(path) + .with_context(|| format!("read mcp config {}", path.display()))?; + serde_json::from_str(&raw) + .with_context(|| format!("parse mcp config {}", path.display())) + } +} + +impl ServerConfig { + /// Return a copy with `${env:VAR}` placeholders resolved against the + /// process environment. Missing env vars are an error for that server; + /// callers should skip the server and continue (ADR §5.6 "per-server + /// failure isolated"). `name` is the server name used in error context. + pub fn resolved(&self, name: &str) -> Result { + let json = serde_json::to_value(self)?; + let resolved = interpolate_value(json, &std::env::vars().collect()) + .with_context(|| format!("resolve env for mcp server {name:?}"))?; + Ok(serde_json::from_value(resolved)?) + } +} + +fn interpolate_value( + value: serde_json::Value, + env: &HashMap, +) -> Result { + use serde_json::Value; + match value { + Value::String(s) => Ok(Value::String(interpolate_env(&s, env)?)), + Value::Array(items) => items + .into_iter() + .map(|v| interpolate_value(v, env)) + .collect::>>() + .map(Value::Array), + Value::Object(map) => map + .into_iter() + .map(|(k, v)| interpolate_value(v, env).map(|v| (k, v))) + .collect::>>() + .map(Value::Object), + other => Ok(other), + } +} + +/// Replace `${env:VAR}` tokens in `input` with the matching env value. +/// Missing variables produce an error naming the offender. +pub fn interpolate_env(input: &str, env: &HashMap) -> Result { + let mut out = String::with_capacity(input.len()); + let mut rest = input; + while let Some(start) = rest.find("${env:") { + out.push_str(&rest[..start]); + let after = &rest[start + "${env:".len()..]; + let end = after + .find('}') + .ok_or_else(|| anyhow!("unterminated ${{env:..}} in {input:?}"))?; + let var = &after[..end]; + let val = env + .get(var) + .ok_or_else(|| anyhow!("env var ${var} not set (referenced by mcp config)"))?; + out.push_str(val); + rest = &after[end + 1..]; + } + out.push_str(rest); + Ok(out) +} + +fn home_dir() -> Option { + std::env::var_os("HOME").map(PathBuf::from) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn env(pairs: &[(&str, &str)]) -> HashMap { + pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect() + } + + #[test] + fn interpolate_replaces_tokens() { + let e = env(&[("FOO", "bar"), ("X", "y")]); + assert_eq!(interpolate_env("a${env:FOO}b${env:X}", &e).unwrap(), "abarby"); + } + + #[test] + fn interpolate_passes_through_plain_strings() { + let e = env(&[]); + assert_eq!(interpolate_env("plain", &e).unwrap(), "plain"); + } + + #[test] + fn interpolate_errors_on_missing_var() { + let e = env(&[]); + let err = interpolate_env("${env:MISSING}", &e).unwrap_err().to_string(); + assert!(err.contains("MISSING"), "expected MISSING in error: {err}"); + } + + #[test] + fn interpolate_errors_on_unterminated() { + let e = env(&[("FOO", "bar")]); + assert!(interpolate_env("${env:FOO", &e).is_err()); + } + + #[test] + fn parses_stdio_and_http_servers() { + let json = r#"{ + "mcpServers": { + "fs": { + "type": "stdio", + "command": "mcp-server-filesystem", + "args": ["/workspace"], + "tool_filter": { "include": ["read_*"] } + }, + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + assert_eq!(cfg.servers.len(), 2); + match cfg.servers.get("fs").unwrap() { + ServerConfig::Stdio { command, args, tool_filter, .. } => { + assert_eq!(command, "mcp-server-filesystem"); + assert_eq!(args, &vec!["/workspace".to_string()]); + assert_eq!(tool_filter.as_ref().unwrap().include, vec!["read_*"]); + } + _ => panic!("expected stdio"), + } + match cfg.servers.get("linear").unwrap() { + ServerConfig::Http { url, oauth, .. } => { + assert_eq!(url, "https://mcp.linear.app/mcp"); + assert_eq!(oauth.as_ref().unwrap().provider.as_deref(), Some("linear")); + } + _ => panic!("expected http"), + } + } + + #[test] + fn resolved_substitutes_env_in_args() { + // SAFETY: single-threaded test; isolated env key. + unsafe { std::env::set_var("MCP_TEST_TOKEN", "secret123"); } + let cfg = ServerConfig::Stdio { + command: "github-mcp-server".into(), + args: vec!["--token".into(), "${env:MCP_TEST_TOKEN}".into()], + env: HashMap::new(), + tool_filter: None, + }; + match cfg.resolved("github").unwrap() { + ServerConfig::Stdio { args, .. } => { + assert_eq!(args[1], "secret123"); + } + _ => unreachable!(), + } + } + + #[test] + fn merge_project_wins() { + let dir = tempfile::tempdir().unwrap(); + let global = dir.path().join("global.json"); + let project = dir.path().join("project.json"); + std::fs::write( + &global, + r#"{"mcpServers":{"fs":{"type":"stdio","command":"global-fs"},"x":{"type":"stdio","command":"global-x"}}}"#, + ).unwrap(); + std::fs::write( + &project, + r#"{"mcpServers":{"fs":{"type":"stdio","command":"project-fs"}}}"#, + ).unwrap(); + let cfg = McpConfig::load_layered(Some(&global), Some(&project)).unwrap(); + assert_eq!(cfg.servers.len(), 2); + match cfg.servers.get("fs").unwrap() { + ServerConfig::Stdio { command, .. } => assert_eq!(command, "project-fs"), + _ => unreachable!(), + } + match cfg.servers.get("x").unwrap() { + ServerConfig::Stdio { command, .. } => assert_eq!(command, "global-x"), + _ => unreachable!(), + } + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs new file mode 100644 index 000000000..5d487979d --- /dev/null +++ b/openab-agent/src/mcp/mod.rs @@ -0,0 +1,3 @@ +//! Native MCP client. See `docs/adr/openab-agent-mcp.md`. + +pub mod config; From 3d588654276de440d63c64e40f85bb0d601328e4 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 17:42:18 +0000 Subject: [PATCH 05/98] style(openab-agent/mcp): apply cargo fmt to config.rs CI fmt-check found 8 formatting deltas in the Phase 1 scaffold. No logic change. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 36 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index cba5f8fd1..892699afa 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; use std::path::{Path, PathBuf}; -use anyhow::{Context, Result, anyhow}; +use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; #[derive(Debug, Default, Clone, Serialize, Deserialize)] @@ -82,8 +82,7 @@ impl McpConfig { fn load_file(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .with_context(|| format!("read mcp config {}", path.display()))?; - serde_json::from_str(&raw) - .with_context(|| format!("parse mcp config {}", path.display())) + serde_json::from_str(&raw).with_context(|| format!("parse mcp config {}", path.display())) } } @@ -152,13 +151,19 @@ mod tests { use super::*; fn env(pairs: &[(&str, &str)]) -> HashMap { - pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect() + pairs + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect() } #[test] fn interpolate_replaces_tokens() { let e = env(&[("FOO", "bar"), ("X", "y")]); - assert_eq!(interpolate_env("a${env:FOO}b${env:X}", &e).unwrap(), "abarby"); + assert_eq!( + interpolate_env("a${env:FOO}b${env:X}", &e).unwrap(), + "abarby" + ); } #[test] @@ -170,7 +175,9 @@ mod tests { #[test] fn interpolate_errors_on_missing_var() { let e = env(&[]); - let err = interpolate_env("${env:MISSING}", &e).unwrap_err().to_string(); + let err = interpolate_env("${env:MISSING}", &e) + .unwrap_err() + .to_string(); assert!(err.contains("MISSING"), "expected MISSING in error: {err}"); } @@ -200,7 +207,12 @@ mod tests { let cfg: McpConfig = serde_json::from_str(json).unwrap(); assert_eq!(cfg.servers.len(), 2); match cfg.servers.get("fs").unwrap() { - ServerConfig::Stdio { command, args, tool_filter, .. } => { + ServerConfig::Stdio { + command, + args, + tool_filter, + .. + } => { assert_eq!(command, "mcp-server-filesystem"); assert_eq!(args, &vec!["/workspace".to_string()]); assert_eq!(tool_filter.as_ref().unwrap().include, vec!["read_*"]); @@ -219,7 +231,9 @@ mod tests { #[test] fn resolved_substitutes_env_in_args() { // SAFETY: single-threaded test; isolated env key. - unsafe { std::env::set_var("MCP_TEST_TOKEN", "secret123"); } + unsafe { + std::env::set_var("MCP_TEST_TOKEN", "secret123"); + } let cfg = ServerConfig::Stdio { command: "github-mcp-server".into(), args: vec!["--token".into(), "${env:MCP_TEST_TOKEN}".into()], @@ -242,11 +256,13 @@ mod tests { std::fs::write( &global, r#"{"mcpServers":{"fs":{"type":"stdio","command":"global-fs"},"x":{"type":"stdio","command":"global-x"}}}"#, - ).unwrap(); + ) + .unwrap(); std::fs::write( &project, r#"{"mcpServers":{"fs":{"type":"stdio","command":"project-fs"}}}"#, - ).unwrap(); + ) + .unwrap(); let cfg = McpConfig::load_layered(Some(&global), Some(&project)).unwrap(); assert_eq!(cfg.servers.len(), 2); match cfg.servers.get("fs").unwrap() { From b70915937e7b9ff032290879e7529da1abfd79ef Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 17:52:54 +0000 Subject: [PATCH 06/98] ci(openab-agent): exercise --features mcp in clippy + test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI previously only built the default feature set, leaving the Phase 1 MCP scaffold (mcp/config.rs, gated by --features mcp) without compile, clippy, or test coverage. Adds explicit --features mcp invocations and watches workflow file changes so this gap is closed for the rest of the rollout (ADR §9). Co-Authored-By: Claude Opus 4.7 --- .github/workflows/ci-openab-agent.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci-openab-agent.yml b/.github/workflows/ci-openab-agent.yml index c0d5a3727..b33d8b613 100644 --- a/.github/workflows/ci-openab-agent.yml +++ b/.github/workflows/ci-openab-agent.yml @@ -4,9 +4,11 @@ on: push: paths: - 'openab-agent/**' + - '.github/workflows/ci-openab-agent.yml' pull_request: paths: - 'openab-agent/**' + - '.github/workflows/ci-openab-agent.yml' jobs: check: @@ -24,7 +26,9 @@ jobs: workspaces: openab-agent - run: cargo fmt --check - run: cargo clippy -- -D warnings + - run: cargo clippy --features mcp -- -D warnings - run: cargo test + - run: cargo test --features mcp - run: cargo test -- --ignored env: ANTHROPIC_API_KEY: "fake-key-for-ci" From 3067fecdcca5ff8a0167fac1c34f0da59245f0a1 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:03:12 +0000 Subject: [PATCH 07/98] feat(openab-agent/mcp): add 'mcp list' CLI subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires up the Phase 1 config loader so clippy --features mcp can see it. McpConfig::load() + ServerConfig::resolved() + serde pretty-print are all reachable from main, clearing the dead-code denial of compile under -D warnings. Output groups successful servers (✓) and failures (✗ with reason), sorted by name for deterministic display. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/main.rs | 17 +++++++++++++++++ openab-agent/src/mcp/mod.rs | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index f5c47f2e8..9486b0a6f 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -24,6 +24,19 @@ enum Commands { #[command(subcommand)] provider: AuthProvider, }, + /// Inspect / manage configured MCP servers + #[cfg(feature = "mcp")] + Mcp { + #[command(subcommand)] + action: McpAction, + }, +} + +#[cfg(feature = "mcp")] +#[derive(Subcommand)] +enum McpAction { + /// List configured MCP servers (loads global + project mcp.json) + List, } #[derive(Subcommand)] @@ -72,5 +85,9 @@ async fn main() { auth::show_status(); } }, + #[cfg(feature = "mcp")] + Some(Commands::Mcp { action }) => match action { + McpAction::List => mcp::cli_list_servers(), + }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 5d487979d..4499a5492 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,3 +1,37 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; + +use config::McpConfig; + +/// `openab-agent mcp list` — load global + project config, resolve env, print. +pub fn cli_list_servers() { + let cfg = match McpConfig::load() { + Ok(c) => c, + Err(e) => { + eprintln!("failed to load mcp config: {e:#}"); + std::process::exit(1); + } + }; + if cfg.servers.is_empty() { + println!("No MCP servers configured."); + println!(" global: ~/.openab/agent/mcp.json"); + println!(" project: ./.openab/agent/mcp.json"); + return; + } + let mut servers: Vec<_> = cfg.servers.iter().collect(); + servers.sort_by(|(a, _), (b, _)| a.cmp(b)); + for (name, server) in servers { + match server.resolved(name) { + Ok(resolved) => { + println!("✓ {name}"); + if let Ok(j) = serde_json::to_string_pretty(&resolved) { + for line in j.lines() { + println!(" {line}"); + } + } + } + Err(e) => println!("✗ {name}: {e:#}"), + } + } +} From 82c43e55b8dc34c98251035f085aaccf82fa0550 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:10:37 +0000 Subject: [PATCH 08/98] fix(openab-agent/mcp): satisfy clippy::unnecessary_sort_by sort_by_key over (name, _) is the cleaner form; no behavior change. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 4499a5492..139f574b1 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -20,7 +20,7 @@ pub fn cli_list_servers() { return; } let mut servers: Vec<_> = cfg.servers.iter().collect(); - servers.sort_by(|(a, _), (b, _)| a.cmp(b)); + servers.sort_by_key(|(name, _)| *name); for (name, server) in servers { match server.resolved(name) { Ok(resolved) => { From cceb42ed03afa6624e1dfa257d6e9faacaeee8ac Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:21:14 +0000 Subject: [PATCH 09/98] feat(openab-agent/mcp): redact secrets in 'mcp list' by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Mira's security review on the Phase 1 thread: the previous 'mcp list' eagerly called ServerConfig::resolved(), substituting \${env:GITHUB_TOKEN} etc. into the printed output. Three leak paths matter — pasting CLI output into bug reports / chat, screen sharing, and stdout log collection. New behavior: - Default: print raw config; \${env:VAR} placeholders kept verbatim. Safe to paste publicly; reader still sees which env var feeds each field. - --resolve opts into substitution and prints a two-line warning banner. Useful for diagnosing missing-env startup failures. No CLI-shape break: 'mcp list' still works; --resolve is additive. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/main.rs | 10 +++++++-- openab-agent/src/mcp/mod.rs | 42 +++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 9486b0a6f..dfa01ac77 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -36,7 +36,13 @@ enum Commands { #[derive(Subcommand)] enum McpAction { /// List configured MCP servers (loads global + project mcp.json) - List, + List { + /// Substitute ${env:VAR} placeholders with real values. + /// WARNING: output will contain secrets if your config references + /// tokens via env vars — do not paste publicly. + #[arg(long)] + resolve: bool, + }, } #[derive(Subcommand)] @@ -87,7 +93,7 @@ async fn main() { }, #[cfg(feature = "mcp")] Some(Commands::Mcp { action }) => match action { - McpAction::List => mcp::cli_list_servers(), + McpAction::List { resolve } => mcp::cli_list_servers(resolve), }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 139f574b1..23245f60d 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -2,10 +2,15 @@ pub mod config; -use config::McpConfig; +use config::{McpConfig, ServerConfig}; -/// `openab-agent mcp list` — load global + project config, resolve env, print. -pub fn cli_list_servers() { +/// `openab-agent mcp list [--resolve]`. +/// +/// Default: print configs verbatim (`${env:VAR}` placeholders kept as-is) so +/// `mcp list` is safe to paste into bug reports. `--resolve` opts into +/// substituting env vars and prints a leading warning — useful for debugging +/// missing-env startup failures locally. +pub fn cli_list_servers(resolve: bool) { let cfg = match McpConfig::load() { Ok(c) => c, Err(e) => { @@ -19,19 +24,34 @@ pub fn cli_list_servers() { println!(" project: ./.openab/agent/mcp.json"); return; } + if resolve { + println!("⚠ --resolve: env vars substituted into output below."); + println!("⚠ Output may contain secrets — do not paste publicly."); + println!(); + } let mut servers: Vec<_> = cfg.servers.iter().collect(); servers.sort_by_key(|(name, _)| *name); for (name, server) in servers { + print_server(name, server, resolve); + } +} + +fn print_server(name: &str, server: &ServerConfig, resolve: bool) { + if resolve { match server.resolved(name) { - Ok(resolved) => { - println!("✓ {name}"); - if let Ok(j) = serde_json::to_string_pretty(&resolved) { - for line in j.lines() { - println!(" {line}"); - } - } - } + Ok(r) => print_json("✓", name, &r), Err(e) => println!("✗ {name}: {e:#}"), } + } else { + print_json("•", name, server); + } +} + +fn print_json(status: &str, name: &str, value: &T) { + println!("{status} {name}"); + if let Ok(json) = serde_json::to_string_pretty(value) { + for line in json.lines() { + println!(" {line}"); + } } } From 20a2448ad70d67c589f0798ad87cb5ae4c786556 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:30:20 +0000 Subject: [PATCH 10/98] fix(openab-agent/mcp): emit --resolve warnings on stderr println! puts the warning banner into stdout, which (a) gets swallowed by 'mcp list --resolve > dump.json' redirection so the user never sees the security notice, and (b) corrupts the JSON payload for downstream pipes like 'mcp list --resolve | jq'. Routing the banner through eprintln! keeps it visible regardless of redirection and keeps stdout pure JSON for piping. Standard Unix convention: data on stdout, diagnostics on stderr. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 23245f60d..f8bae4385 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -25,9 +25,9 @@ pub fn cli_list_servers(resolve: bool) { return; } if resolve { - println!("⚠ --resolve: env vars substituted into output below."); - println!("⚠ Output may contain secrets — do not paste publicly."); - println!(); + eprintln!("⚠ --resolve: env vars substituted into output below."); + eprintln!("⚠ Output may contain secrets — do not paste publicly."); + eprintln!(); } let mut servers: Vec<_> = cfg.servers.iter().collect(); servers.sort_by_key(|(name, _)| *name); From 7079c9063c0e524dcff5988a6f958959b0075527 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:47:32 +0000 Subject: [PATCH 11/98] feat(openab-agent/mcp): add runtime state-machine scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces McpRuntimeManager owning one ServerHandle per configured server, each starting in ServerStatus::Disconnected per ADR §5.7 (lazy connect). Wires the manager via `mcp status` CLI so the types are exercised by clippy --features mcp; actual rmcp TokioChildProcess dial + Connected / Failed transitions land in the next slice to keep that risky bit isolated for bisecting. --- openab-agent/src/main.rs | 3 + openab-agent/src/mcp/mod.rs | 33 ++++++-- openab-agent/src/mcp/runtime.rs | 130 ++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 7 deletions(-) create mode 100644 openab-agent/src/mcp/runtime.rs diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index dfa01ac77..066e92f37 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -43,6 +43,8 @@ enum McpAction { #[arg(long)] resolve: bool, }, + /// Show per-server runtime status + Status, } #[derive(Subcommand)] @@ -94,6 +96,7 @@ async fn main() { #[cfg(feature = "mcp")] Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), + McpAction::Status => mcp::cli_show_status(), }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index f8bae4385..7345e608e 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,8 +1,17 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; +pub mod runtime; use config::{McpConfig, ServerConfig}; +use runtime::McpRuntimeManager; + +fn load_config_or_exit() -> McpConfig { + McpConfig::load().unwrap_or_else(|e| { + eprintln!("failed to load mcp config: {e:#}"); + std::process::exit(1); + }) +} /// `openab-agent mcp list [--resolve]`. /// @@ -11,13 +20,7 @@ use config::{McpConfig, ServerConfig}; /// substituting env vars and prints a leading warning — useful for debugging /// missing-env startup failures locally. pub fn cli_list_servers(resolve: bool) { - let cfg = match McpConfig::load() { - Ok(c) => c, - Err(e) => { - eprintln!("failed to load mcp config: {e:#}"); - std::process::exit(1); - } - }; + let cfg = load_config_or_exit(); if cfg.servers.is_empty() { println!("No MCP servers configured."); println!(" global: ~/.openab/agent/mcp.json"); @@ -55,3 +58,19 @@ fn print_json(status: &str, name: &str, value: &T) { } } } + +/// `openab-agent mcp status`. +/// +/// Prints per-server runtime status. Phase 1 always reports `Disconnected` +/// because servers are not yet dialed; the next slice wires `connect()` and +/// real state transitions land then. +pub fn cli_show_status() { + let manager = McpRuntimeManager::from_config(load_config_or_exit()); + if manager.is_empty() { + println!("No MCP servers configured."); + return; + } + for (name, status) in manager.statuses() { + println!("{} {name}", status.icon()); + } +} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs new file mode 100644 index 000000000..1fd26446b --- /dev/null +++ b/openab-agent/src/mcp/runtime.rs @@ -0,0 +1,130 @@ +//! Per-server lifecycle manager. See ADR §5.4 + §5.7. +//! +//! This slice lands only the state-machine scaffold (statuses, handle map, +//! lazy-connect entry point). The actual rmcp `TokioChildProcess` dial + +//! client storage lands in the next slice — keeping that risky bit out of +//! the same commit so any breakage is easy to bisect. + +use std::collections::HashMap; + +use super::config::{McpConfig, ServerConfig}; + +/// Per-server status. ADR §5.7: lazy connect — handles start `Disconnected` +/// and transition to `Connecting` only on first use. Connecting / Connected / +/// Failed are wired up by `connect()` in the next slice. +#[allow(dead_code)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ServerStatus { + Disconnected, + Connecting, + Connected, + Failed(String), +} + +impl ServerStatus { + pub fn icon(&self) -> &'static str { + match self { + ServerStatus::Disconnected => "○", + ServerStatus::Connecting => "◐", + ServerStatus::Connected => "●", + ServerStatus::Failed(_) => "✗", + } + } +} + +#[allow(dead_code)] // name + config consumed by connect() in the next slice +#[derive(Debug)] +pub struct ServerHandle { + pub name: String, + pub config: ServerConfig, + pub status: ServerStatus, +} + +/// Owns one `ServerHandle` per configured server. Created once at process +/// start (or session start, per ADR §5.8 refresh model). +#[derive(Debug, Default)] +pub struct McpRuntimeManager { + handles: HashMap, +} + +impl McpRuntimeManager { + pub fn from_config(cfg: McpConfig) -> Self { + let handles = cfg + .servers + .into_iter() + .map(|(name, config)| { + let handle = ServerHandle { + name: name.clone(), + config, + status: ServerStatus::Disconnected, + }; + (name, handle) + }) + .collect(); + Self { handles } + } + + pub fn statuses(&self) -> Vec<(&str, &ServerStatus)> { + let mut out: Vec<_> = self + .handles + .iter() + .map(|(name, h)| (name.as_str(), &h.status)) + .collect(); + out.sort_by_key(|(name, _)| *name); + out + } + + pub fn len(&self) -> usize { + self.handles.len() + } + + pub fn is_empty(&self) -> bool { + self.handles.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn from_config_initializes_each_server_disconnected() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + assert_eq!(mgr.len(), 2); + let statuses = mgr.statuses(); + assert_eq!(statuses.len(), 2); + for (_, status) in statuses { + assert_eq!(*status, ServerStatus::Disconnected); + } + } + + #[test] + fn empty_config_yields_empty_manager() { + let mgr = McpRuntimeManager::from_config(McpConfig::default()); + assert!(mgr.is_empty()); + assert_eq!(mgr.len(), 0); + assert!(mgr.statuses().is_empty()); + } + + #[test] + fn statuses_sorted_by_name() { + let json = r#"{ + "mcpServers": { + "zed": { "type": "stdio", "command": "z" }, + "alpha": { "type": "stdio", "command": "a" }, + "mid": { "type": "stdio", "command": "m" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let names: Vec<&str> = mgr.statuses().into_iter().map(|(n, _)| n).collect(); + assert_eq!(names, vec!["alpha", "mid", "zed"]); + } +} From 40b849b4dcdc9aeeae9442feb12049e6d47fa2bf Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:50:53 +0000 Subject: [PATCH 12/98] fix(openab-agent/mcp): drop unused len() from McpRuntimeManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `clippy --features mcp -- -D warnings` doesn't compile the test target, so `len()`'s only callers (the unit tests) didn't keep it alive. Tests already used `statuses().len()` in one place — switch the other two to match and drop the now-dead method. `is_empty()` stays because `cli_show_status` calls it. --- openab-agent/src/mcp/runtime.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 1fd26446b..bdb036088 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -74,10 +74,6 @@ impl McpRuntimeManager { out } - pub fn len(&self) -> usize { - self.handles.len() - } - pub fn is_empty(&self) -> bool { self.handles.is_empty() } @@ -97,7 +93,6 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - assert_eq!(mgr.len(), 2); let statuses = mgr.statuses(); assert_eq!(statuses.len(), 2); for (_, status) in statuses { @@ -109,7 +104,6 @@ mod tests { fn empty_config_yields_empty_manager() { let mgr = McpRuntimeManager::from_config(McpConfig::default()); assert!(mgr.is_empty()); - assert_eq!(mgr.len(), 0); assert!(mgr.statuses().is_empty()); } From 3ab8decfeb46db7f73c90c91d095dbf40dff9c37 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:02:54 +0000 Subject: [PATCH 13/98] feat(openab-agent/mcp): wrap handles in Arc, stub async connect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADR §5.7 lifecycle needs a `connect()` method that spawns a child process and awaits an rmcp handshake — both `Send` across `.await`. Plain `HashMap` is not `Sync`, and the background idle- eviction task will share the map with the foreground `mcp call` path, so the read-heavy / write-light access pattern wants `tokio::sync::RwLock`. This slice lands the lock migration only: - `handles: Arc>>` - `statuses()` and `is_empty()` become async; `cli_show_status` follows - `McpRuntimeManager` is now `Clone` (Arc bump) so the eviction task can hold its own handle - `connect(name)` transitions to `Connecting` and returns; the actual `rmcp::TokioChildProcess` dial + `Connected` / `Failed` transitions land in the next slice — keeping that bit isolated for bisecting 6 unit tests cover snapshot ordering, unknown-server error, transition, and clone-shares-state. --- openab-agent/src/main.rs | 2 +- openab-agent/src/mcp/mod.rs | 6 +- openab-agent/src/mcp/runtime.rs | 134 +++++++++++++++++++++++--------- 3 files changed, 103 insertions(+), 39 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 066e92f37..f96bdd0ed 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -96,7 +96,7 @@ async fn main() { #[cfg(feature = "mcp")] Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), - McpAction::Status => mcp::cli_show_status(), + McpAction::Status => mcp::cli_show_status().await, }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 7345e608e..1b6c5a27e 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -64,13 +64,13 @@ fn print_json(status: &str, name: &str, value: &T) { /// Prints per-server runtime status. Phase 1 always reports `Disconnected` /// because servers are not yet dialed; the next slice wires `connect()` and /// real state transitions land then. -pub fn cli_show_status() { +pub async fn cli_show_status() { let manager = McpRuntimeManager::from_config(load_config_or_exit()); - if manager.is_empty() { + if manager.is_empty().await { println!("No MCP servers configured."); return; } - for (name, status) in manager.statuses() { + for (name, status) in manager.statuses().await { println!("{} {name}", status.icon()); } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index bdb036088..1296ecc4a 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -1,18 +1,24 @@ //! Per-server lifecycle manager. See ADR §5.4 + §5.7. //! -//! This slice lands only the state-machine scaffold (statuses, handle map, -//! lazy-connect entry point). The actual rmcp `TokioChildProcess` dial + -//! client storage lands in the next slice — keeping that risky bit out of -//! the same commit so any breakage is easy to bisect. +//! Handles live behind `Arc>` so `connect()` (async, +//! spawns child processes) is `Send` across `.await` and a background idle- +//! eviction task can share the map with foreground `mcp call` invocations +//! (ADR §5.7). Read-heavy / write-light fits `RwLock`. +//! +//! This slice lands the lock migration and a `connect()` that transitions to +//! `Connecting`; the actual rmcp `TokioChildProcess` dial + transition to +//! `Connected` / `Failed` lands in the next slice — keeping that risky bit +//! isolated for bisecting. use std::collections::HashMap; +use std::sync::Arc; + +use anyhow::{anyhow, Result}; +use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -/// Per-server status. ADR §5.7: lazy connect — handles start `Disconnected` -/// and transition to `Connecting` only on first use. Connecting / Connected / -/// Failed are wired up by `connect()` in the next slice. -#[allow(dead_code)] +#[allow(dead_code)] // Connected / Failed land with the rmcp dial in the next slice #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, @@ -32,7 +38,7 @@ impl ServerStatus { } } -#[allow(dead_code)] // name + config consumed by connect() in the next slice +#[allow(dead_code)] // name + config consumed by the rmcp dial in the next slice #[derive(Debug)] pub struct ServerHandle { pub name: String, @@ -40,16 +46,16 @@ pub struct ServerHandle { pub status: ServerStatus, } -/// Owns one `ServerHandle` per configured server. Created once at process -/// start (or session start, per ADR §5.8 refresh model). -#[derive(Debug, Default)] +/// Owns one `ServerHandle` per configured server, behind an async `RwLock` +/// so the foreground LLM path and the background eviction task can share it. +#[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { - handles: HashMap, + handles: Arc>>, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { - let handles = cfg + let handles: HashMap<_, _> = cfg .servers .into_iter() .map(|(name, config)| { @@ -61,21 +67,39 @@ impl McpRuntimeManager { (name, handle) }) .collect(); - Self { handles } + Self { + handles: Arc::new(RwLock::new(handles)), + } } - pub fn statuses(&self) -> Vec<(&str, &ServerStatus)> { - let mut out: Vec<_> = self - .handles - .iter() - .map(|(name, h)| (name.as_str(), &h.status)) - .collect(); - out.sort_by_key(|(name, _)| *name); + /// Snapshot of `(name, status)` sorted by name. Clones out so the read + /// guard is dropped before returning — callers don't hold a lock. + pub async fn statuses(&self) -> Vec<(String, ServerStatus)> { + let mut out: Vec<_> = { + let guard = self.handles.read().await; + guard + .iter() + .map(|(name, h)| (name.clone(), h.status.clone())) + .collect() + }; + out.sort_by(|(a, _), (b, _)| a.cmp(b)); out } - pub fn is_empty(&self) -> bool { - self.handles.is_empty() + pub async fn is_empty(&self) -> bool { + self.handles.read().await.is_empty() + } + + /// Transition the named server to `Connecting`. The rmcp + /// `TokioChildProcess` dial + transition to `Connected` / `Failed` + /// lands in the next slice — see module doc. + pub async fn connect(&self, name: &str) -> Result<()> { + let mut guard = self.handles.write().await; + let handle = guard + .get_mut(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + handle.status = ServerStatus::Connecting; + Ok(()) } } @@ -83,8 +107,8 @@ impl McpRuntimeManager { mod tests { use super::*; - #[test] - fn from_config_initializes_each_server_disconnected() { + #[tokio::test] + async fn from_config_initializes_each_server_disconnected() { let json = r#"{ "mcpServers": { "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, @@ -93,22 +117,22 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - let statuses = mgr.statuses(); + let statuses = mgr.statuses().await; assert_eq!(statuses.len(), 2); for (_, status) in statuses { - assert_eq!(*status, ServerStatus::Disconnected); + assert_eq!(status, ServerStatus::Disconnected); } } - #[test] - fn empty_config_yields_empty_manager() { + #[tokio::test] + async fn empty_config_yields_empty_manager() { let mgr = McpRuntimeManager::from_config(McpConfig::default()); - assert!(mgr.is_empty()); - assert!(mgr.statuses().is_empty()); + assert!(mgr.is_empty().await); + assert!(mgr.statuses().await.is_empty()); } - #[test] - fn statuses_sorted_by_name() { + #[tokio::test] + async fn statuses_sorted_by_name() { let json = r#"{ "mcpServers": { "zed": { "type": "stdio", "command": "z" }, @@ -118,7 +142,47 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - let names: Vec<&str> = mgr.statuses().into_iter().map(|(n, _)| n).collect(); + let names: Vec = mgr + .statuses() + .await + .into_iter() + .map(|(n, _)| n) + .collect(); assert_eq!(names, vec!["alpha", "mid", "zed"]); } + + #[tokio::test] + async fn connect_unknown_server_errors() { + let mgr = McpRuntimeManager::from_config(McpConfig::default()); + let err = mgr.connect("missing").await.unwrap_err().to_string(); + assert!(err.contains("missing"), "expected 'missing' in {err}"); + } + + #[tokio::test] + async fn connect_transitions_to_connecting() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "true" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + mgr.connect("fs").await.unwrap(); + let statuses = mgr.statuses().await; + assert_eq!(statuses[0].1, ServerStatus::Connecting); + } + + #[tokio::test] + async fn manager_clone_shares_state() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "true" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let a = McpRuntimeManager::from_config(cfg); + let b = a.clone(); + a.connect("fs").await.unwrap(); + assert_eq!(b.statuses().await[0].1, ServerStatus::Connecting); + } } From dc1b0a6d6d30cffe6df62a01af98f67cff3faeba Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:10:40 +0000 Subject: [PATCH 14/98] fix(openab-agent/mcp): satisfy cargo fmt --check on runtime test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chained `.statuses().await.into_iter().map(|(n, _)| n).collect()` fits on one line under rustfmt's default 100-char width — broke it preemptively last tick on the wrong side of the threshold. --- openab-agent/src/mcp/runtime.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 1296ecc4a..16c759e72 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -142,12 +142,7 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - let names: Vec = mgr - .statuses() - .await - .into_iter() - .map(|(n, _)| n) - .collect(); + let names: Vec = mgr.statuses().await.into_iter().map(|(n, _)| n).collect(); assert_eq!(names, vec!["alpha", "mid", "zed"]); } From be9ee62ed43ced14f709b8405e5e761f2cef100b Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:20:58 +0000 Subject: [PATCH 15/98] fix(openab-agent/mcp): allow(dead_code) on connect() stub `clippy --features mcp -D warnings` doesn't compile the test target, so `connect()`'s only callers (the unit tests) don't keep it alive. The real rmcp dial lands in the next slice and will be reachable from the meta-tool dispatch / `mcp call` CLI path; until then a targeted allow keeps clippy quiet without papering over the type-level dead_code on ServerStatus variants (which is already explicitly allowed). --- openab-agent/src/mcp/runtime.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 16c759e72..f54bff0ed 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -93,6 +93,7 @@ impl McpRuntimeManager { /// Transition the named server to `Connecting`. The rmcp /// `TokioChildProcess` dial + transition to `Connected` / `Failed` /// lands in the next slice — see module doc. + #[allow(dead_code)] // wired into meta-tool dispatch in the next slice; tests keep it covered pub async fn connect(&self, name: &str) -> Result<()> { let mut guard = self.handles.write().await; let handle = guard From 82d5da4bd45b6051f93a6c23ea3fac320a832ec5 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:40:22 +0000 Subject: [PATCH 16/98] feat(openab-agent/mcp): real stdio dial via rmcp + double-lock connect connect() now spawns the configured stdio child via rmcp's TokioChildProcess, runs the JSON-RPC handshake (().serve(transport)), and parks the resulting RunningService on ServerHandle.client. Lock discipline uses a double-acquire: a brief write lock to mark Connecting, drop, run the dial without any lock held, then a second brief write lock to install the client or record Failed(msg). Holding the write lock across the spawn+handshake would block mcp status and the future idle-eviction scan for the entire dial latency. HTTP transport is rejected with a "phase 2" error so misconfigured entries surface cleanly without advancing state past Disconnected. Adds `openab-agent mcp connect ` as a smoke-test CLI for mcp.json entries; the RunningService is dropped on process exit. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/main.rs | 6 ++ openab-agent/src/mcp/mod.rs | 21 ++++- openab-agent/src/mcp/runtime.rs | 134 +++++++++++++++++++++++++------- 3 files changed, 132 insertions(+), 29 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index f96bdd0ed..7acf6769b 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -45,6 +45,11 @@ enum McpAction { }, /// Show per-server runtime status Status, + /// Spawn the configured server and run the MCP handshake (smoke-test). + Connect { + /// Server name as configured in mcp.json + name: String, + }, } #[derive(Subcommand)] @@ -97,6 +102,7 @@ async fn main() { Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), McpAction::Status => mcp::cli_show_status().await, + McpAction::Connect { name } => mcp::cli_connect(name).await, }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 1b6c5a27e..c6b6bd110 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -61,9 +61,9 @@ fn print_json(status: &str, name: &str, value: &T) { /// `openab-agent mcp status`. /// -/// Prints per-server runtime status. Phase 1 always reports `Disconnected` -/// because servers are not yet dialed; the next slice wires `connect()` and -/// real state transitions land then. +/// Prints per-server runtime status. Servers start `Disconnected` and only +/// advance after `mcp connect ` (or, later, lazy dial from the agent +/// path). pub async fn cli_show_status() { let manager = McpRuntimeManager::from_config(load_config_or_exit()); if manager.is_empty().await { @@ -74,3 +74,18 @@ pub async fn cli_show_status() { println!("{} {name}", status.icon()); } } + +/// `openab-agent mcp connect `. Spawns the configured stdio server, +/// runs the rmcp handshake, and reports success or the failure reason. +/// The connection is dropped on process exit — this CLI is a smoke-test +/// for `mcp.json` entries, not a long-running session. +pub async fn cli_connect(name: String) { + let manager = McpRuntimeManager::from_config(load_config_or_exit()); + match manager.connect(&name).await { + Ok(()) => println!("● connected: {name}"), + Err(e) => { + eprintln!("✗ {name}: {e:#}"); + std::process::exit(1); + } + } +} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index f54bff0ed..a99b3d6ca 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -5,20 +5,26 @@ //! eviction task can share the map with foreground `mcp call` invocations //! (ADR §5.7). Read-heavy / write-light fits `RwLock`. //! -//! This slice lands the lock migration and a `connect()` that transitions to -//! `Connecting`; the actual rmcp `TokioChildProcess` dial + transition to -//! `Connected` / `Failed` lands in the next slice — keeping that risky bit -//! isolated for bisecting. +//! `connect()` uses a double-lock pattern: a short write lock to mark +//! `Connecting`, release the lock, run the rmcp handshake without holding +//! any lock, then re-acquire briefly to install the client or record the +//! failure. Holding the write lock across the `serve(...).await` would +//! starve every reader (including `mcp status` and the eviction scan) for +//! the duration of a child-process spawn + handshake. use std::collections::HashMap; use std::sync::Arc; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; +use rmcp::service::{RoleClient, RunningService}; +use rmcp::ServiceExt; +use rmcp::transport::{ConfigureCommandExt, TokioChildProcess}; +use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -#[allow(dead_code)] // Connected / Failed land with the rmcp dial in the next slice +#[allow(dead_code)] // NeedsAuth lands with the Phase 2 OAuth slice (ADR §5.7) #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, @@ -38,12 +44,22 @@ impl ServerStatus { } } -#[allow(dead_code)] // name + config consumed by the rmcp dial in the next slice -#[derive(Debug)] pub struct ServerHandle { pub name: String, pub config: ServerConfig, pub status: ServerStatus, + pub client: Option>, +} + +impl std::fmt::Debug for ServerHandle { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ServerHandle") + .field("name", &self.name) + .field("config", &self.config) + .field("status", &self.status) + .field("client", &self.client.is_some()) + .finish() + } } /// Owns one `ServerHandle` per configured server, behind an async `RwLock` @@ -63,6 +79,7 @@ impl McpRuntimeManager { name: name.clone(), config, status: ServerStatus::Disconnected, + client: None, }; (name, handle) }) @@ -90,17 +107,75 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } - /// Transition the named server to `Connecting`. The rmcp - /// `TokioChildProcess` dial + transition to `Connected` / `Failed` - /// lands in the next slice — see module doc. - #[allow(dead_code)] // wired into meta-tool dispatch in the next slice; tests keep it covered + /// Lazy-connect the named server (ADR §5.7). Idempotent if already + /// `Connected` with a live client. HTTP transport is Phase 2. pub async fn connect(&self, name: &str) -> Result<()> { + let dial = { + let mut guard = self.handles.write().await; + let handle = guard + .get_mut(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + if matches!(handle.status, ServerStatus::Connected) && handle.client.is_some() { + return Ok(()); + } + let resolved = handle.config.resolved(name)?; + let dial = match resolved { + ServerConfig::Stdio { + command, args, env, .. + } => StdioDial { command, args, env }, + ServerConfig::Http { .. } => { + return Err(anyhow!( + "http transport lands in phase 2 (server {name:?})" + )); + } + }; + handle.status = ServerStatus::Connecting; + dial + }; + + let dial_result = dial.run().await; + let mut guard = self.handles.write().await; let handle = guard .get_mut(name) - .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; - handle.status = ServerStatus::Connecting; - Ok(()) + .ok_or_else(|| anyhow!("server {name:?} vanished during connect"))?; + match dial_result { + Ok(client) => { + handle.status = ServerStatus::Connected; + handle.client = Some(client); + Ok(()) + } + Err(e) => { + let msg = format!("{e:#}"); + handle.status = ServerStatus::Failed(msg.clone()); + Err(anyhow!(msg)) + } + } + } +} + +struct StdioDial { + command: String, + args: Vec, + env: HashMap, +} + +impl StdioDial { + async fn run(self) -> Result> { + let Self { + command, + args, + env, + } = self; + let cmd = Command::new(&command).configure(|c| { + c.args(&args); + c.envs(&env); + }); + let transport = TokioChildProcess::new(cmd) + .with_context(|| format!("spawn mcp child process {command:?}"))?; + ().serve(transport) + .await + .with_context(|| format!("mcp handshake with {command:?}")) } } @@ -155,30 +230,37 @@ mod tests { } #[tokio::test] - async fn connect_transitions_to_connecting() { + async fn connect_http_returns_phase2_error() { let json = r#"{ "mcpServers": { - "fs": { "type": "stdio", "command": "true" } + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - mgr.connect("fs").await.unwrap(); - let statuses = mgr.statuses().await; - assert_eq!(statuses[0].1, ServerStatus::Connecting); + let err = mgr.connect("linear").await.unwrap_err().to_string(); + assert!(err.contains("phase 2"), "expected 'phase 2' in {err}"); + // Status not advanced past Disconnected for unsupported transports. + assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); } #[tokio::test] - async fn manager_clone_shares_state() { + async fn connect_to_missing_binary_records_failed() { let json = r#"{ "mcpServers": { - "fs": { "type": "stdio", "command": "true" } + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let a = McpRuntimeManager::from_config(cfg); - let b = a.clone(); - a.connect("fs").await.unwrap(); - assert_eq!(b.statuses().await[0].1, ServerStatus::Connecting); + let mgr = McpRuntimeManager::from_config(cfg); + let err = mgr.connect("broken").await.unwrap_err().to_string(); + assert!(err.contains("spawn"), "expected 'spawn' in {err}"); + match &mgr.statuses().await[0].1 { + ServerStatus::Failed(msg) => assert!(msg.contains("spawn")), + other => panic!("expected Failed, got {other:?}"), + } } } From 8fcd17c3b485e3250c796fe787eddb0a8dc249a5 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:43:04 +0000 Subject: [PATCH 17/98] fix(openab-agent/mcp): race guard for concurrent connect() on same server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Mira's Tick 13 review: when two callers race on the same name, both pass the first-lock idempotency check (Disconnected), both spawn a child, both come back to acquire the install-lock. The second arrival must yield to the winner, otherwise it overwrites the installed client and silently drops the working RunningService. Adds a 3-line double-check at the second lock acquisition: if status is already Connected with a live client, return Ok(()) — `dial_result` falls out of scope here and RunningService::Drop kills the duplicate child. Cheaper than a Connecting-aware notify/condvar and adequate until the eviction-aware slice lands. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index a99b3d6ca..69c1cdc0a 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -139,6 +139,12 @@ impl McpRuntimeManager { let handle = guard .get_mut(name) .ok_or_else(|| anyhow!("server {name:?} vanished during connect"))?; + // Race guard: a concurrent connect() may have installed a client while + // we were dialing. Yield to the winner — `dial_result` drops here, + // killing the duplicate child via RunningService's Drop impl. + if matches!(handle.status, ServerStatus::Connected) && handle.client.is_some() { + return Ok(()); + } match dial_result { Ok(client) => { handle.status = ServerStatus::Connected; From 3b214ddd7c4d63221ec2d46aef16a7fd837e003e Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:50:59 +0000 Subject: [PATCH 18/98] fix(openab-agent/mcp): satisfy cargo fmt --check on runtime.rs Three rustfmt collapses that I missed under default 100-char width: * import order: bare `rmcp::ServiceExt` sorts AFTER `rmcp::transport::*` (rustfmt puts sub-module paths before bare items within a crate). * `return Err(anyhow!(...))` for the HTTP phase-2 message fits on one line (~88 chars). * `let Self { command, args, env } = self;` fits on one line. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 69c1cdc0a..9fd04adeb 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -17,8 +17,8 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use rmcp::service::{RoleClient, RunningService}; -use rmcp::ServiceExt; use rmcp::transport::{ConfigureCommandExt, TokioChildProcess}; +use rmcp::ServiceExt; use tokio::process::Command; use tokio::sync::RwLock; @@ -124,9 +124,7 @@ impl McpRuntimeManager { command, args, env, .. } => StdioDial { command, args, env }, ServerConfig::Http { .. } => { - return Err(anyhow!( - "http transport lands in phase 2 (server {name:?})" - )); + return Err(anyhow!("http transport lands in phase 2 (server {name:?})")); } }; handle.status = ServerStatus::Connecting; @@ -168,11 +166,7 @@ struct StdioDial { impl StdioDial { async fn run(self) -> Result> { - let Self { - command, - args, - env, - } = self; + let Self { command, args, env } = self; let cmd = Command::new(&command).configure(|c| { c.args(&args); c.envs(&env); From cf6d7cd33ab84b69f2294c79c1e3d8644fe51d37 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:03:09 +0000 Subject: [PATCH 19/98] feat(openab-agent/mcp): meta-tool action enum + help/list_servers dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the single `mcp` tool the LLM sees (ADR §5.2). This slice lands: * `Action` enum tagged on `action`, matching the eight ADR actions, deserialized straight from the LLM's tool-call payload. * `dispatch(manager, action) -> Result` as the single entry point — both `agent.rs::execute_tool` (next slice) and tests go through this. * `help` returns the static usage doc. * `list_servers` returns `[{name, status, transport}]` via a new `McpRuntimeManager::snapshot()` that clones out under a read lock. * The four IO-bearing actions (`list_tools`, `describe_tool`, `call`, `status`) return a `not yet implemented` error so the surface area is visible to callers without pre-implementing the peer-borrow path. `login` / `complete_login` land with the Phase 2 OAuth slice. `#[allow(dead_code)]` on `Action` and `dispatch` because clippy --features mcp doesn't compile the test target and agent.rs wire-up lands next slice. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 208 ++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 1 + openab-agent/src/mcp/runtime.rs | 15 +++ 3 files changed, 224 insertions(+) create mode 100644 openab-agent/src/mcp/meta_tool.rs diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs new file mode 100644 index 000000000..8222c1f96 --- /dev/null +++ b/openab-agent/src/mcp/meta_tool.rs @@ -0,0 +1,208 @@ +//! Single `mcp` meta-tool the LLM sees. See ADR §5.2 + §5.3. +//! +//! Phase 1 scope: action enum + dispatch wiring + the two no-IO actions +//! (`help`, `list_servers`). The IO-bearing actions (`list_tools`, +//! `describe_tool`, `call`, `status`) return a `not yet implemented` +//! error so the contract surface is visible to callers while the +//! `RunningService` borrow path lands in the next slice. The Phase 2 +//! `login` / `complete_login` actions land with the OAuth slice. + +use anyhow::{anyhow, Result}; +use serde::Deserialize; +use serde_json::{json, Value}; + +use super::config::ServerConfig; +use super::runtime::{McpRuntimeManager, ServerStatus}; + +/// Deserialized form of the meta-tool's input JSON (ADR §5.2). The LLM +/// sends `{ "action": "...", ... }`; `tag = "action"` routes by that field. +#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice +#[derive(Debug, Deserialize)] +#[serde(tag = "action", rename_all = "snake_case")] +pub enum Action { + Help, + ListServers, + ListTools { + server: String, + }, + DescribeTool { + server: String, + tool: String, + }, + Call { + server: String, + tool: String, + #[serde(default)] + arguments: Value, + }, + Status { + #[serde(default)] + server: Option, + }, +} + +/// Entry point — the LLM tool dispatcher hands us a deserialized `Action` +/// and we return the JSON payload that becomes the tool result. +#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice +pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result { + match action { + Action::Help => Ok(json!(HELP)), + Action::ListServers => Ok(list_servers(manager).await), + Action::ListTools { .. } + | Action::DescribeTool { .. } + | Action::Call { .. } + | Action::Status { .. } => Err(anyhow!( + "mcp action not yet implemented in phase 1 (lands with the peer-borrow slice)" + )), + } +} + +const HELP: &str = "\ +The `mcp` tool lets you talk to configured MCP servers. + +Actions: + help show this message + list_servers list configured servers and status + list_tools(server) list tools exposed by a server + describe_tool(server, tool) show input_schema for one tool + call(server, tool, args) invoke a tool + status(server?) per-server health + last error + +Connections are lazy: the first action that needs a server spawns its \ +child process and runs the handshake. Idle servers are evicted after \ +the configured TTL."; + +async fn list_servers(manager: &McpRuntimeManager) -> Value { + let snapshot = manager.snapshot().await; + let entries: Vec = snapshot + .into_iter() + .map(|(name, status, config)| { + json!({ + "name": name, + "status": status_label(&status), + "transport": transport_label(&config), + }) + }) + .collect(); + Value::Array(entries) +} + +fn status_label(status: &ServerStatus) -> &'static str { + match status { + ServerStatus::Disconnected => "disconnected", + ServerStatus::Connecting => "connecting", + ServerStatus::Connected => "connected", + ServerStatus::Failed(_) => "failed", + } +} + +fn transport_label(config: &ServerConfig) -> &'static str { + match config { + ServerConfig::Stdio { .. } => "stdio", + ServerConfig::Http { .. } => "http", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mcp::config::McpConfig; + + fn mgr_from(json: &str) -> McpRuntimeManager { + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + McpRuntimeManager::from_config(cfg) + } + + #[tokio::test] + async fn help_returns_doc_string() { + let mgr = mgr_from(r#"{"mcpServers":{}}"#); + let result = dispatch(&mgr, Action::Help).await.unwrap(); + let s = result.as_str().unwrap(); + assert!(s.contains("list_servers")); + assert!(s.contains("call(server, tool")); + } + + #[tokio::test] + async fn list_servers_reports_name_status_transport() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#, + ); + let result = dispatch(&mgr, Action::ListServers).await.unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 2); + let by_name: std::collections::HashMap<_, _> = entries + .iter() + .map(|e| (e["name"].as_str().unwrap(), e)) + .collect(); + assert_eq!(by_name["fs"]["transport"], "stdio"); + assert_eq!(by_name["fs"]["status"], "disconnected"); + assert_eq!(by_name["linear"]["transport"], "http"); + } + + #[tokio::test] + async fn list_servers_empty_yields_empty_array() { + let mgr = mgr_from(r#"{"mcpServers":{}}"#); + let result = dispatch(&mgr, Action::ListServers).await.unwrap(); + assert!(result.as_array().unwrap().is_empty()); + } + + #[tokio::test] + async fn unimplemented_actions_error_cleanly() { + let mgr = mgr_from(r#"{"mcpServers":{}}"#); + for action in [ + Action::ListTools { + server: "fs".into(), + }, + Action::DescribeTool { + server: "fs".into(), + tool: "read".into(), + }, + Action::Call { + server: "fs".into(), + tool: "read".into(), + arguments: json!({}), + }, + Action::Status { server: None }, + ] { + let err = dispatch(&mgr, action).await.unwrap_err().to_string(); + assert!(err.contains("not yet implemented"), "got: {err}"); + } + } + + #[test] + fn action_deserializes_from_meta_tool_payload() { + let payload = json!({ + "action": "call", + "server": "github", + "tool": "create_issue", + "arguments": { "title": "x" } + }); + let action: Action = serde_json::from_value(payload).unwrap(); + match action { + Action::Call { + server, + tool, + arguments, + } => { + assert_eq!(server, "github"); + assert_eq!(tool, "create_issue"); + assert_eq!(arguments["title"], "x"); + } + other => panic!("expected Call, got {other:?}"), + } + } + + #[test] + fn action_status_server_is_optional() { + let action: Action = serde_json::from_value(json!({ "action": "status" })).unwrap(); + assert!(matches!(action, Action::Status { server: None })); + let action: Action = + serde_json::from_value(json!({ "action": "status", "server": "fs" })).unwrap(); + assert!(matches!(action, Action::Status { server: Some(_) })); + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index c6b6bd110..b7b7b8f78 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,6 +1,7 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; +pub mod meta_tool; pub mod runtime; use config::{McpConfig, ServerConfig}; diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 9fd04adeb..e48207017 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -107,6 +107,21 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } + /// Snapshot of `(name, status, config)` sorted by name. Used by the + /// `list_servers` meta-tool action which needs the transport variant + /// alongside the runtime status. + pub async fn snapshot(&self) -> Vec<(String, ServerStatus, ServerConfig)> { + let mut out: Vec<_> = { + let guard = self.handles.read().await; + guard + .iter() + .map(|(name, h)| (name.clone(), h.status.clone(), h.config.clone())) + .collect() + }; + out.sort_by(|(a, ..), (b, ..)| a.cmp(b)); + out + } + /// Lazy-connect the named server (ADR §5.7). Idempotent if already /// `Connected` with a live client. HTTP transport is Phase 2. pub async fn connect(&self, name: &str) -> Result<()> { From 27f5ccaf07c40f79e9d7d533c1b75ad4c9a096ae Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:12:24 +0000 Subject: [PATCH 20/98] =?UTF-8?q?refactor(openab-agent/mcp):=20apply=20Mir?= =?UTF-8?q?a=20Tick=2015=20review=20=E2=80=94=20zero-clone=20+=20LLM=20fal?= =?UTF-8?q?lback=20hint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two of Mira's three Tick 15 findings (Arc-wrap for peer borrow lands with the actual list_tools/call slice — Arc without a consumer earns no keep). (1) `not_implemented_msg(action)` names the requested action, lists the actions that DO work (`help`, `list_servers`), and tells the LLM to fall back to native `read` / `write` / `edit` / `bash` rather than retrying. The old generic "not yet implemented" gave the model nothing to act on. (2) `ServerConfig::transport_label() -> &'static str` replaces the config clone inside `snapshot()`. The `Stdio` variant carries an `args: Vec` and `env: HashMap`; cloning that just to read the variant tag was wasted heap traffic on every `list_servers` call. `snapshot()` now yields `(String, ServerStatus, &'static str)`. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 13 +++++ openab-agent/src/mcp/meta_tool.rs | 83 +++++++++++++++++++------------ openab-agent/src/mcp/runtime.rs | 10 ++-- 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 892699afa..742459430 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -36,6 +36,19 @@ pub enum ServerConfig { }, } +impl ServerConfig { + /// Static label used by the `mcp` meta-tool's `list_servers` action. + /// Returning `&'static str` lets `snapshot()` avoid cloning the + /// (potentially large) `Stdio { args, env, ... }` payload just to + /// read the transport variant. + pub fn transport_label(&self) -> &'static str { + match self { + ServerConfig::Stdio { .. } => "stdio", + ServerConfig::Http { .. } => "http", + } + } +} + #[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct ToolFilter { #[serde(default)] diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 8222c1f96..ee49e454c 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -11,7 +11,6 @@ use anyhow::{anyhow, Result}; use serde::Deserialize; use serde_json::{json, Value}; -use super::config::ServerConfig; use super::runtime::{McpRuntimeManager, ServerStatus}; /// Deserialized form of the meta-tool's input JSON (ADR §5.2). The LLM @@ -48,15 +47,30 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), - Action::ListTools { .. } - | Action::DescribeTool { .. } - | Action::Call { .. } - | Action::Status { .. } => Err(anyhow!( - "mcp action not yet implemented in phase 1 (lands with the peer-borrow slice)" - )), + other => Err(anyhow!("{}", not_implemented_msg(&other))), } } +/// Error body for actions whose handler hasn't landed yet. Mentions the +/// requested action and the supported set so the LLM can recover by +/// falling back to the native `read` / `write` / `edit` / `bash` tools +/// instead of retrying the same action blindly. +fn not_implemented_msg(action: &Action) -> String { + let name = match action { + Action::Help => "help", + Action::ListServers => "list_servers", + Action::ListTools { .. } => "list_tools", + Action::DescribeTool { .. } => "describe_tool", + Action::Call { .. } => "call", + Action::Status { .. } => "status", + }; + format!( + "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ + Currently supported: 'help', 'list_servers'. To complete your task \ + right now, fall back to the native agent tools (read, write, edit, bash)." + ) +} + const HELP: &str = "\ The `mcp` tool lets you talk to configured MCP servers. @@ -76,11 +90,11 @@ async fn list_servers(manager: &McpRuntimeManager) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot .into_iter() - .map(|(name, status, config)| { + .map(|(name, status, transport)| { json!({ "name": name, "status": status_label(&status), - "transport": transport_label(&config), + "transport": transport, }) }) .collect(); @@ -96,13 +110,6 @@ fn status_label(status: &ServerStatus) -> &'static str { } } -fn transport_label(config: &ServerConfig) -> &'static str { - match config { - ServerConfig::Stdio { .. } => "stdio", - ServerConfig::Http { .. } => "http", - } -} - #[cfg(test)] mod tests { use super::*; @@ -152,25 +159,37 @@ mod tests { } #[tokio::test] - async fn unimplemented_actions_error_cleanly() { + async fn unimplemented_actions_name_themselves_and_guide_fallback() { let mgr = mgr_from(r#"{"mcpServers":{}}"#); - for action in [ - Action::ListTools { - server: "fs".into(), - }, - Action::DescribeTool { - server: "fs".into(), - tool: "read".into(), - }, - Action::Call { - server: "fs".into(), - tool: "read".into(), - arguments: json!({}), - }, - Action::Status { server: None }, - ] { + let cases = [ + ( + Action::ListTools { + server: "fs".into(), + }, + "list_tools", + ), + ( + Action::DescribeTool { + server: "fs".into(), + tool: "read".into(), + }, + "describe_tool", + ), + ( + Action::Call { + server: "fs".into(), + tool: "read".into(), + arguments: json!({}), + }, + "call", + ), + (Action::Status { server: None }, "status"), + ]; + for (action, expected_name) in cases { let err = dispatch(&mgr, action).await.unwrap_err().to_string(); + assert!(err.contains(expected_name), "missing action name: {err}"); assert!(err.contains("not yet implemented"), "got: {err}"); + assert!(err.contains("read, write, edit, bash"), "missing fallback: {err}"); } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index e48207017..a4828e899 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -107,15 +107,15 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } - /// Snapshot of `(name, status, config)` sorted by name. Used by the - /// `list_servers` meta-tool action which needs the transport variant - /// alongside the runtime status. - pub async fn snapshot(&self) -> Vec<(String, ServerStatus, ServerConfig)> { + /// Snapshot of `(name, status, transport_label)` sorted by name. Used + /// by the `list_servers` meta-tool action; the static transport label + /// avoids cloning the `Stdio { args, env, .. }` payload. + pub async fn snapshot(&self) -> Vec<(String, ServerStatus, &'static str)> { let mut out: Vec<_> = { let guard = self.handles.read().await; guard .iter() - .map(|(name, h)| (name.clone(), h.status.clone(), h.config.clone())) + .map(|(name, h)| (name.clone(), h.status.clone(), h.config.transport_label())) .collect() }; out.sort_by(|(a, ..), (b, ..)| a.cmp(b)); From 8b91c1c089659946d68c4b4af820fd18e4332b61 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:20:38 +0000 Subject: [PATCH 21/98] fix(openab-agent/mcp): split wide assert!() to satisfy cargo fmt --check rustfmt's fn_call_width default (60) splits assert!() args when the inline args exceed it; the new "read, write, edit, bash" fallback assert tipped the third call from ~55 to ~62 chars. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index ee49e454c..548b99691 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -189,7 +189,10 @@ mod tests { let err = dispatch(&mgr, action).await.unwrap_err().to_string(); assert!(err.contains(expected_name), "missing action name: {err}"); assert!(err.contains("not yet implemented"), "got: {err}"); - assert!(err.contains("read, write, edit, bash"), "missing fallback: {err}"); + assert!( + err.contains("read, write, edit, bash"), + "missing fallback: {err}" + ); } } From 13d5ea7b03ae51bf8858e328111b1dbd592ae527 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:34:02 +0000 Subject: [PATCH 22/98] feat(openab-agent/mcp): list_tools action + Arc-cloned peer borrow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply Mira's Tick 15 review (3) and ship the first IO-bearing meta-tool action. * `ServerHandle.client` is now `Option>>`. `connect()` wraps the dial result in `Arc::new`. * `McpRuntimeManager::arc_peer(name)` clones the Arc out under a short read lock, drops the guard, returns the handle. Callers `.await` on `peer.list_all_tools()` / `peer.call_tool()` with no runtime lock held — kills the writer-starvation risk Mira flagged and sidesteps `Future is not Send` from holding a guard across `.await`. * `Action::ListTools { server }` now wired: lazy `connect()` → `arc_peer()` → `peer.list_all_tools()` → `[{name, description}]`. `describe_tool` / `call` / `status` still stub. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 62 ++++++++++++++++++++++++++----- openab-agent/src/mcp/runtime.rs | 28 +++++++++++++- 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 548b99691..413cd73bf 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -7,7 +7,7 @@ //! `RunningService` borrow path lands in the next slice. The Phase 2 //! `login` / `complete_login` actions land with the OAuth slice. -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use serde::Deserialize; use serde_json::{json, Value}; @@ -47,6 +47,7 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), + Action::ListTools { server } => list_tools(manager, &server).await, other => Err(anyhow!("{}", not_implemented_msg(&other))), } } @@ -66,8 +67,9 @@ fn not_implemented_msg(action: &Action) -> String { }; format!( "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ - Currently supported: 'help', 'list_servers'. To complete your task \ - right now, fall back to the native agent tools (read, write, edit, bash)." + Currently supported: 'help', 'list_servers', 'list_tools'. To complete \ + your task right now, fall back to the native agent tools (read, write, \ + edit, bash)." ) } @@ -86,6 +88,30 @@ Connections are lazy: the first action that needs a server spawns its \ child process and runs the handshake. Idle servers are evicted after \ the configured TTL."; +async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { + // Lazy connect per ADR §5.3 — idempotent if already Connected. + manager + .connect(server) + .await + .with_context(|| format!("connect mcp server {server:?}"))?; + let peer = manager.arc_peer(server).await?; + // Arc lets the I/O `.await` run with no runtime lock held. + let tools = peer + .list_all_tools() + .await + .with_context(|| format!("list_all_tools on {server:?}"))?; + let entries: Vec = tools + .into_iter() + .map(|t| { + json!({ + "name": t.name, + "description": t.description, + }) + }) + .collect(); + Ok(Value::Array(entries)) +} + async fn list_servers(manager: &McpRuntimeManager) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot @@ -158,16 +184,34 @@ mod tests { assert!(result.as_array().unwrap().is_empty()); } + #[tokio::test] + async fn list_tools_propagates_connect_failure() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::ListTools { + server: "broken".into(), + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("connect mcp server"), "got: {err}"); + } + #[tokio::test] async fn unimplemented_actions_name_themselves_and_guide_fallback() { let mgr = mgr_from(r#"{"mcpServers":{}}"#); let cases = [ - ( - Action::ListTools { - server: "fs".into(), - }, - "list_tools", - ), ( Action::DescribeTool { server: "fs".into(), diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index a4828e899..5fa41c8d3 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -48,7 +48,11 @@ pub struct ServerHandle { pub name: String, pub config: ServerConfig, pub status: ServerStatus, - pub client: Option>, + /// `Arc` so foreground callers can clone a peer handle out under a + /// short read lock, drop the guard, and then run `peer.list_all_tools()` + /// / `peer.call_tool()` without holding any runtime lock across the + /// I/O `.await` (avoids writer starvation + `Future is not Send` traps). + pub client: Option>>, } impl std::fmt::Debug for ServerHandle { @@ -107,6 +111,26 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } + /// Clone the live MCP client handle for `name` out from under a short + /// read lock. The caller `.await`s on the returned `Arc` with no + /// runtime lock held, so background writers (idle eviction, new + /// `connect`s) are not starved by long-running tool calls. + /// + /// Errors if the server isn't configured or isn't currently + /// `Connected`. Callers that want lazy-connect should run + /// `connect(name)` first. + pub async fn arc_peer(&self, name: &str) -> Result>> { + let guard = self.handles.read().await; + let handle = guard + .get(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + handle + .client + .as_ref() + .cloned() + .ok_or_else(|| anyhow!("mcp server {name:?} is not connected")) + } + /// Snapshot of `(name, status, transport_label)` sorted by name. Used /// by the `list_servers` meta-tool action; the static transport label /// avoids cloning the `Stdio { args, env, .. }` payload. @@ -161,7 +185,7 @@ impl McpRuntimeManager { match dial_result { Ok(client) => { handle.status = ServerStatus::Connected; - handle.client = Some(client); + handle.client = Some(Arc::new(client)); Ok(()) } Err(e) => { From 2c7f1c62457e51ec212030078570e634ae8d5ccb Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:43:54 +0000 Subject: [PATCH 23/98] feat(openab-agent/mcp): call action with lenient argument coercion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply Mira's Tick 18 review (3) — lenient args parsing for the `call` action so LLMs that send `arguments: null` (or omit the field entirely) for no-arg tools don't bounce off a strict type check. * `Action::Call { server, tool, arguments }` is wired through `call_tool()`: validate arguments → lazy `connect()` → `arc_peer()` → `peer.call_tool(CallToolRequestParams::new(...).with_arguments(map))` → serialize `CallToolResult` to JSON. * Argument coercion: `Value::Object(map) → map`, `Value::Null → {}`, everything else (string, number, array, bool) is rejected with a message that names the actual type so the LLM can correct itself. * Tests cover both branches: non-object string args fail early at validation; null args pass validation and fail later at connect. * `describe_tool` and `status` still stub; supported-list message updated to include `call`. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 104 ++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 413cd73bf..850f2be27 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -48,6 +48,11 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), Action::ListTools { server } => list_tools(manager, &server).await, + Action::Call { + server, + tool, + arguments, + } => call_tool(manager, &server, &tool, arguments).await, other => Err(anyhow!("{}", not_implemented_msg(&other))), } } @@ -67,9 +72,9 @@ fn not_implemented_msg(action: &Action) -> String { }; format!( "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ - Currently supported: 'help', 'list_servers', 'list_tools'. To complete \ - your task right now, fall back to the native agent tools (read, write, \ - edit, bash)." + Currently supported: 'help', 'list_servers', 'list_tools', 'call'. \ + To complete your task right now, fall back to the native agent tools \ + (read, write, edit, bash)." ) } @@ -88,6 +93,39 @@ Connections are lazy: the first action that needs a server spawns its \ child process and runs the handshake. Idle servers are evicted after \ the configured TTL."; +async fn call_tool( + manager: &McpRuntimeManager, + server: &str, + tool: &str, + arguments: Value, +) -> Result { + // Lenient arg coercion per Mira's Tick 18 review: LLMs often send + // `null` or omit `arguments` for no-arg tools; rejecting those would + // make zero-arg calls fragile. Only real type errors (string, number, + // array, bool) are refused. + let args_map = match arguments { + Value::Object(map) => map, + Value::Null => serde_json::Map::new(), + other => { + return Err(anyhow!( + "mcp call arguments must be a JSON object (or null/omitted for no-arg tools), got {other}" + )); + } + }; + manager + .connect(server) + .await + .with_context(|| format!("connect mcp server {server:?}"))?; + let peer = manager.arc_peer(server).await?; + let params = rmcp::model::CallToolRequestParams::new(tool.to_string()) + .with_arguments(args_map); + let result = peer + .call_tool(params) + .await + .with_context(|| format!("call_tool {tool:?} on {server:?}"))?; + serde_json::to_value(&result).context("serialize CallToolResult") +} + async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { // Lazy connect per ADR §5.3 — idempotent if already Connected. manager @@ -184,6 +222,58 @@ mod tests { assert!(result.as_array().unwrap().is_empty()); } + #[tokio::test] + async fn call_rejects_non_object_arguments() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "true" } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::Call { + server: "fs".into(), + tool: "read".into(), + arguments: json!("oops, a string"), + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("must be a JSON object"), "got: {err}"); + } + + #[tokio::test] + async fn call_null_arguments_passes_validation_and_reaches_connect() { + // Null args should be coerced to {} and fail at the *connect* step + // (binary doesn't exist), not at the validation step. + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::Call { + server: "broken".into(), + tool: "read".into(), + arguments: Value::Null, + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("connect mcp server"), "got: {err}"); + assert!(!err.contains("must be a JSON object"), "got: {err}"); + } + #[tokio::test] async fn list_tools_propagates_connect_failure() { let mgr = mgr_from( @@ -219,14 +309,6 @@ mod tests { }, "describe_tool", ), - ( - Action::Call { - server: "fs".into(), - tool: "read".into(), - arguments: json!({}), - }, - "call", - ), (Action::Status { server: None }, "status"), ]; for (action, expected_name) in cases { From 2a859e6a4f70467338f8e547166f610078d5582d Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:51:26 +0000 Subject: [PATCH 24/98] fix(openab-agent/mcp): collapse let-params chain to satisfy cargo fmt The CallToolRequestParams chain fits in exactly 100 chars; rustfmt keeps it on one line. Burned a tick on a defensive break. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 850f2be27..5b3d456ec 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -117,8 +117,7 @@ async fn call_tool( .await .with_context(|| format!("connect mcp server {server:?}"))?; let peer = manager.arc_peer(server).await?; - let params = rmcp::model::CallToolRequestParams::new(tool.to_string()) - .with_arguments(args_map); + let params = rmcp::model::CallToolRequestParams::new(tool.to_string()).with_arguments(args_map); let result = peer .call_tool(params) .await From 6653008ab5e5e003cc1c0896e25065b31b5db55c Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:13:30 +0000 Subject: [PATCH 25/98] feat(openab-agent/mcp): describe_tool + status actions complete phase 1 surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit describe_tool returns full input_schema for one tool (list+filter via list_all_tools — MCP spec has no single-tool query). status reports per-server {name, status, transport, last_error} with optional name filter; surfaces Failed's embedded error as last_error. Extract fetch_tools helper shared by list_tools + describe_tool — this is the natural insertion point for the planned tools_cache. --- openab-agent/src/mcp/meta_tool.rs | 220 ++++++++++++++++++++++-------- 1 file changed, 165 insertions(+), 55 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 5b3d456ec..8f8503a3e 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -1,11 +1,8 @@ //! Single `mcp` meta-tool the LLM sees. See ADR §5.2 + §5.3. //! -//! Phase 1 scope: action enum + dispatch wiring + the two no-IO actions -//! (`help`, `list_servers`). The IO-bearing actions (`list_tools`, -//! `describe_tool`, `call`, `status`) return a `not yet implemented` -//! error so the contract surface is visible to callers while the -//! `RunningService` borrow path lands in the next slice. The Phase 2 -//! `login` / `complete_login` actions land with the OAuth slice. +//! Phase 1 scope: action enum + dispatch wiring + all six Phase 1 actions +//! (`help`, `list_servers`, `list_tools`, `describe_tool`, `call`, `status`). +//! The Phase 2 `login` / `complete_login` actions land with the OAuth slice. use anyhow::{anyhow, Context, Result}; use serde::Deserialize; @@ -48,36 +45,16 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), Action::ListTools { server } => list_tools(manager, &server).await, + Action::DescribeTool { server, tool } => describe_tool(manager, &server, &tool).await, Action::Call { server, tool, arguments, } => call_tool(manager, &server, &tool, arguments).await, - other => Err(anyhow!("{}", not_implemented_msg(&other))), + Action::Status { server } => Ok(status(manager, server.as_deref()).await), } } -/// Error body for actions whose handler hasn't landed yet. Mentions the -/// requested action and the supported set so the LLM can recover by -/// falling back to the native `read` / `write` / `edit` / `bash` tools -/// instead of retrying the same action blindly. -fn not_implemented_msg(action: &Action) -> String { - let name = match action { - Action::Help => "help", - Action::ListServers => "list_servers", - Action::ListTools { .. } => "list_tools", - Action::DescribeTool { .. } => "describe_tool", - Action::Call { .. } => "call", - Action::Status { .. } => "status", - }; - format!( - "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ - Currently supported: 'help', 'list_servers', 'list_tools', 'call'. \ - To complete your task right now, fall back to the native agent tools \ - (read, write, edit, bash)." - ) -} - const HELP: &str = "\ The `mcp` tool lets you talk to configured MCP servers. @@ -125,19 +102,24 @@ async fn call_tool( serde_json::to_value(&result).context("serialize CallToolResult") } -async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { - // Lazy connect per ADR §5.3 — idempotent if already Connected. +/// Lazy-connect + list all tools on `server`. Shared by `list_tools` / +/// `describe_tool` (and the planned `tools_cache` on ServerHandle will plug +/// in here). The `Arc` clone lets the I/O `.await` run with +/// no runtime lock held. +async fn fetch_tools(manager: &McpRuntimeManager, server: &str) -> Result> { manager .connect(server) .await .with_context(|| format!("connect mcp server {server:?}"))?; let peer = manager.arc_peer(server).await?; - // Arc lets the I/O `.await` run with no runtime lock held. - let tools = peer - .list_all_tools() + peer.list_all_tools() .await - .with_context(|| format!("list_all_tools on {server:?}"))?; - let entries: Vec = tools + .with_context(|| format!("list_all_tools on {server:?}")) +} + +async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { + let entries: Vec = fetch_tools(manager, server) + .await? .into_iter() .map(|t| { json!({ @@ -149,6 +131,43 @@ async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result Ok(Value::Array(entries)) } +async fn describe_tool(manager: &McpRuntimeManager, server: &str, tool: &str) -> Result { + // Progressive disclosure (ADR §5.2): `list_tools` returns compact + // `{name, description}`; this action returns the full `input_schema` + // for one tool. MCP has no single-tool query, so we list + filter. + let tool_def = fetch_tools(manager, server) + .await? + .into_iter() + .find(|t| t.name.as_ref() == tool) + .ok_or_else(|| anyhow!("no tool {tool:?} on mcp server {server:?}"))?; + Ok(json!({ + "name": tool_def.name, + "description": tool_def.description, + "input_schema": tool_def.input_schema, + })) +} + +async fn status(manager: &McpRuntimeManager, filter: Option<&str>) -> Value { + let snapshot = manager.snapshot().await; + let entries: Vec = snapshot + .into_iter() + .filter(|(name, _, _)| filter.map_or(true, |f| f == name.as_str())) + .map(|(name, status, transport)| { + let last_error = match &status { + ServerStatus::Failed(msg) => Some(msg.clone()), + _ => None, + }; + json!({ + "name": name, + "status": status_label(&status), + "transport": transport, + "last_error": last_error, + }) + }) + .collect(); + Value::Array(entries) +} + async fn list_servers(manager: &McpRuntimeManager) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot @@ -298,29 +317,120 @@ mod tests { } #[tokio::test] - async fn unimplemented_actions_name_themselves_and_guide_fallback() { - let mgr = mgr_from(r#"{"mcpServers":{}}"#); - let cases = [ - ( - Action::DescribeTool { - server: "fs".into(), - tool: "read".into(), - }, - "describe_tool", - ), - (Action::Status { server: None }, "status"), - ]; - for (action, expected_name) in cases { - let err = dispatch(&mgr, action).await.unwrap_err().to_string(); - assert!(err.contains(expected_name), "missing action name: {err}"); - assert!(err.contains("not yet implemented"), "got: {err}"); - assert!( - err.contains("read, write, edit, bash"), - "missing fallback: {err}" - ); + async fn describe_tool_propagates_connect_failure() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::DescribeTool { + server: "broken".into(), + tool: "read".into(), + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("connect mcp server"), "got: {err}"); + } + + #[tokio::test] + async fn status_lists_each_server_with_null_last_error_by_default() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#, + ); + let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 2); + for e in entries { + assert_eq!(e["status"], "disconnected"); + assert!(e["last_error"].is_null()); } } + #[tokio::test] + async fn status_filter_by_server_returns_single_entry() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#, + ); + let result = dispatch( + &mgr, + Action::Status { + server: Some("fs".into()), + }, + ) + .await + .unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["name"], "fs"); + assert_eq!(entries[0]["transport"], "stdio"); + } + + #[tokio::test] + async fn status_unknown_filter_returns_empty_array() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" } + } + }"#, + ); + let result = dispatch( + &mgr, + Action::Status { + server: Some("nope".into()), + }, + ) + .await + .unwrap(); + assert!(result.as_array().unwrap().is_empty()); + } + + #[tokio::test] + async fn status_surfaces_last_error_after_failed_connect() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let _ = dispatch( + &mgr, + Action::ListTools { + server: "broken".into(), + }, + ) + .await; + let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["status"], "failed"); + let last_error = entries[0]["last_error"].as_str().unwrap(); + assert!(last_error.contains("spawn"), "got: {last_error}"); + } + #[test] fn action_deserializes_from_meta_tool_payload() { let payload = json!({ From f36fb77b350091bfb3b80688dd1d1621dadf08d9 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:14:54 +0000 Subject: [PATCH 26/98] fix(openab-agent/mcp): break dispatch().await.unwrap() chain past chain_width --- openab-agent/src/mcp/meta_tool.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 8f8503a3e..ad702dd46 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -351,7 +351,9 @@ mod tests { } }"#, ); - let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let result = dispatch(&mgr, Action::Status { server: None }) + .await + .unwrap(); let entries = result.as_array().unwrap(); assert_eq!(entries.len(), 2); for e in entries { @@ -423,7 +425,9 @@ mod tests { }, ) .await; - let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let result = dispatch(&mgr, Action::Status { server: None }) + .await + .unwrap(); let entries = result.as_array().unwrap(); assert_eq!(entries.len(), 1); assert_eq!(entries[0]["status"], "failed"); From 6a3e9e0c79003b6ff1e9a29803ae9f3d4730b0b9 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:21:49 +0000 Subject: [PATCH 27/98] fix(openab-agent/mcp): use is_none_or per clippy::unnecessary_map_or --- openab-agent/src/mcp/meta_tool.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index ad702dd46..2588c97bc 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -151,7 +151,7 @@ async fn status(manager: &McpRuntimeManager, filter: Option<&str>) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot .into_iter() - .filter(|(name, _, _)| filter.map_or(true, |f| f == name.as_str())) + .filter(|(name, _, _)| filter.is_none_or(|f| f == name.as_str())) .map(|(name, status, transport)| { let last_error = match &status { ServerStatus::Failed(msg) => Some(msg.clone()), From 6e2b73683c49b6def9b939be2b8fa3d99fc72fca Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:44:05 +0000 Subject: [PATCH 28/98] feat(openab-agent/mcp): wire mcp meta-tool into agent dispatch loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaces the `mcp` action enum to the LLM via a new ToolDef and routes calls through McpRuntimeManager when configured. AcpServer constructs the manager once at startup (warn-and-default on config parse failure) and clones the cheap Arc-backed handle into each session's Agent. Tools.rs stays stateless and feature-flag-free — the mcp routing arm lives in Agent::execute_tool_call where it has access to the manager. A shared MCP_TOOL_NAME const keeps mcp_tool_def() and the dispatch arm in sync. --- openab-agent/src/acp.rs | 13 +++++- openab-agent/src/agent.rs | 71 +++++++++++++++++++++++++++---- openab-agent/src/mcp/meta_tool.rs | 2 - openab-agent/src/mcp/mod.rs | 58 ++++++++++++++++++++++++- 4 files changed, 132 insertions(+), 12 deletions(-) diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 38054f25d..8529f7e82 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -1,5 +1,7 @@ use crate::agent::Agent; use crate::llm::AnthropicProvider; +#[cfg(feature = "mcp")] +use crate::mcp::{self, McpRuntimeManager}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::collections::HashMap; @@ -35,6 +37,8 @@ pub struct AcpServer { // TODO(v0.2): add session TTL and periodic cleanup to prevent OOM sessions: HashMap, working_dir: String, + #[cfg(feature = "mcp")] + mcp_manager: McpRuntimeManager, } impl AcpServer { @@ -44,6 +48,8 @@ impl AcpServer { working_dir: std::env::current_dir() .map(|p| p.to_string_lossy().to_string()) .unwrap_or_else(|_| "/tmp".to_string()), + #[cfg(feature = "mcp")] + mcp_manager: mcp::load_runtime_or_warn(), } } @@ -154,7 +160,12 @@ impl AcpServer { } }; - let agent = Agent::new_boxed(provider, self.working_dir.clone()); + let agent = Agent::new_boxed( + provider, + self.working_dir.clone(), + #[cfg(feature = "mcp")] + Some(self.mcp_manager.clone()), + ); self.sessions.insert(session_id.clone(), agent); let resp = JsonRpcResponse { jsonrpc: "2.0", diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index b4a32d722..ad09eb63a 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -1,14 +1,18 @@ use anyhow::Result; +#[cfg(feature = "mcp")] +use serde::Deserialize; use std::path::PathBuf; use tracing::{debug, info}; use crate::llm::{ContentBlock, LlmEvent, LlmProvider, Message, ToolDef}; +#[cfg(feature = "mcp")] +use crate::mcp::{self, McpRuntimeManager}; use crate::skills; use crate::tools; const SYSTEM_PROMPT: &str = r#"You are openab-agent, a coding assistant. You help users by reading, writing, and editing files, and running shell commands. -You have 4 tools available: +You have these tools available: - read: Read file contents or list a directory - write: Create or overwrite a file - edit: Replace a string in a file (first occurrence) @@ -16,6 +20,12 @@ You have 4 tools available: Be direct and concise. Execute tasks immediately rather than explaining what you would do. When you need to understand code, read the relevant files first."#; +#[cfg(feature = "mcp")] +const MCP_SYSTEM_PROMPT_APPENDIX: &str = "\n\nAdditional tool:\n\ + - mcp: Talk to configured MCP servers. Always call `mcp(action=\"help\")` \ + first to learn the action surface, then `mcp(action=\"list_servers\")` to see \ + what's configured before calling tools."; + const MAX_TOOL_LOOPS: usize = 50; /// Maximum number of messages to keep in context. When exceeded, oldest /// messages (excluding the first user message) are dropped. @@ -27,45 +37,70 @@ pub struct Agent { working_dir: PathBuf, system_prompt: String, tools: Vec, + #[cfg(feature = "mcp")] + mcp_manager: Option, } impl Agent { #[cfg(test)] pub fn new(provider: impl LlmProvider + 'static, working_dir: String) -> Self { - let system_prompt = Self::build_system_prompt(&working_dir); + let system_prompt = Self::build_system_prompt(&working_dir, false); Self { provider: Box::new(provider), messages: Vec::new(), working_dir: PathBuf::from(working_dir), system_prompt, tools: tools::tool_definitions(), + #[cfg(feature = "mcp")] + mcp_manager: None, } } - pub fn new_boxed(provider: Box, working_dir: String) -> Self { - let system_prompt = Self::build_system_prompt(&working_dir); + pub fn new_boxed( + provider: Box, + working_dir: String, + #[cfg(feature = "mcp")] mcp_manager: Option, + ) -> Self { + #[cfg(feature = "mcp")] + let has_mcp = mcp_manager.is_some(); + #[cfg(not(feature = "mcp"))] + let has_mcp = false; + let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); + let mut tools = tools::tool_definitions(); + #[cfg(feature = "mcp")] + if mcp_manager.is_some() { + tools.push(mcp::mcp_tool_def()); + } Self { provider, messages: Vec::new(), working_dir: PathBuf::from(working_dir), system_prompt, - tools: tools::tool_definitions(), + tools, + #[cfg(feature = "mcp")] + mcp_manager, } } /// Run the agent with a user prompt, executing tool calls until completion. /// Returns the final text response. - fn build_system_prompt(working_dir: &str) -> String { + #[cfg_attr(not(feature = "mcp"), allow(unused_variables))] + fn build_system_prompt(working_dir: &str, mcp_enabled: bool) -> String { let wd = std::path::Path::new(working_dir); let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); - let base = if custom.is_empty() { + let mut base = if custom.is_empty() { SYSTEM_PROMPT.to_string() } else { format!("{}\n\n---\n\n{}", custom.trim(), SYSTEM_PROMPT) }; + #[cfg(feature = "mcp")] + if mcp_enabled { + base.push_str(MCP_SYSTEM_PROMPT_APPENDIX); + } + let discovered = skills::discover_skills(wd); if discovered.is_empty() { base @@ -140,7 +175,7 @@ impl Agent { let mut tool_results: Vec = Vec::new(); for (id, name, input) in &tool_calls { info!("executing tool: {name}"); - let result = tools::execute_tool(name, input, &self.working_dir).await; + let result = self.execute_tool_call(name, input).await; match result { Ok(output) => { tool_results.push(ContentBlock::ToolResult { @@ -184,6 +219,26 @@ impl Agent { } } + /// Route the `mcp` meta-tool to the MCP runtime when configured; + /// everything else goes to the stateless `tools::execute_tool`. Keeping + /// the routing here (rather than inside `tools.rs`) lets `tools.rs` stay + /// stateless and free of MCP/feature plumbing. + async fn execute_tool_call(&self, name: &str, input: &serde_json::Value) -> Result { + #[cfg(feature = "mcp")] + if name == mcp::MCP_TOOL_NAME { + let Some(manager) = self.mcp_manager.as_ref() else { + return Err(anyhow::anyhow!( + "mcp tool invoked but no McpRuntimeManager configured" + )); + }; + let action = mcp::meta_tool::Action::deserialize(input) + .map_err(|e| anyhow::anyhow!("invalid mcp action payload: {e}"))?; + let value = mcp::meta_tool::dispatch(manager, action).await?; + return Ok(serde_json::to_string(&value)?); + } + tools::execute_tool(name, input, &self.working_dir).await + } + async fn call_llm(&self) -> Result> { self.provider .chat(&self.system_prompt, &self.messages, &self.tools) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 2588c97bc..557badf4c 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -12,7 +12,6 @@ use super::runtime::{McpRuntimeManager, ServerStatus}; /// Deserialized form of the meta-tool's input JSON (ADR §5.2). The LLM /// sends `{ "action": "...", ... }`; `tag = "action"` routes by that field. -#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice #[derive(Debug, Deserialize)] #[serde(tag = "action", rename_all = "snake_case")] pub enum Action { @@ -39,7 +38,6 @@ pub enum Action { /// Entry point — the LLM tool dispatcher hands us a deserialized `Action` /// and we return the JSON payload that becomes the tool result. -#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result { match action { Action::Help => Ok(json!(HELP)), diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index b7b7b8f78..2bd9dadee 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -4,8 +4,52 @@ pub mod config; pub mod meta_tool; pub mod runtime; +use serde_json::json; + +use crate::llm::ToolDef; use config::{McpConfig, ServerConfig}; -use runtime::McpRuntimeManager; + +pub use runtime::McpRuntimeManager; + +/// Shared tool name used by `mcp_tool_def()` and the agent dispatch arm — +/// keeps the implicit contract between the two call sites explicit. +pub const MCP_TOOL_NAME: &str = "mcp"; + +/// The single `mcp` tool definition the LLM sees (ADR §5.2). The schema is +/// intentionally permissive on the per-action fields — the LLM should call +/// `mcp(action="help")` first to learn the action-specific contract. +pub fn mcp_tool_def() -> ToolDef { + ToolDef { + name: MCP_TOOL_NAME.to_string(), + description: "Talk to configured MCP servers. Call with \ + {action: 'help'} first to see the available actions \ + (help, list_servers, list_tools, describe_tool, call, status)." + .to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["help", "list_servers", "list_tools", + "describe_tool", "call", "status"], + "description": "Which meta-tool action to invoke" + }, + "server": { + "type": "string", + "description": "Server name (required by list_tools / describe_tool / call; optional filter for status)" + }, + "tool": { + "type": "string", + "description": "Tool name on the server (required by describe_tool / call)" + }, + "arguments": { + "description": "Tool arguments for call — JSON object, or null/omitted for no-arg tools" + } + }, + "required": ["action"] + }), + } +} fn load_config_or_exit() -> McpConfig { McpConfig::load().unwrap_or_else(|e| { @@ -14,6 +58,18 @@ fn load_config_or_exit() -> McpConfig { }) } +/// Construct an `McpRuntimeManager` from on-disk config, falling back to an +/// empty manager (with a `tracing::warn!`) on parse failure. Long-running +/// servers (ACP, future HTTP) call this so a malformed `mcp.json` cannot +/// kill the host process — CLI subcommands use `load_config_or_exit` instead. +pub fn load_runtime_or_warn() -> McpRuntimeManager { + let cfg = McpConfig::load().unwrap_or_else(|e| { + tracing::warn!("mcp config failed to load, starting with no servers: {e:#}"); + McpConfig::default() + }); + McpRuntimeManager::from_config(cfg) +} + /// `openab-agent mcp list [--resolve]`. /// /// Default: print configs verbatim (`${env:VAR}` placeholders kept as-is) so From 6e08c284102c4a84dc9348fcfba96f7a277d2612 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:45:33 +0000 Subject: [PATCH 29/98] fix(openab-agent/mcp): silence unused_mut on feature-off build The `mut` on `tools` and `base` is consumed only inside `#[cfg(feature = "mcp")]` blocks, so the no-feature build saw them as unused. Gate `allow(unused_mut)` behind `cfg_attr(not(feature = "mcp"), ...)` so both builds stay clean. --- openab-agent/src/agent.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index ad09eb63a..419e91f72 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -66,6 +66,7 @@ impl Agent { #[cfg(not(feature = "mcp"))] let has_mcp = false; let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); + #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] let mut tools = tools::tool_definitions(); #[cfg(feature = "mcp")] if mcp_manager.is_some() { @@ -90,6 +91,7 @@ impl Agent { let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); + #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] let mut base = if custom.is_empty() { SYSTEM_PROMPT.to_string() } else { From a0ebd5f4d2b3ee37f460c622814d93321c8bfa20 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:53:28 +0000 Subject: [PATCH 30/98] fix(openab-agent/acp): serialize env-var-touching tests with Mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `test_session_new` and `test_session_new_missing_key` both mutate `ANTHROPIC_API_KEY`. cargo runs tests in parallel, so a `set` from one thread can be observed by the other before the first reads it. The pre-existing race was timing-tight enough to pass historically; adding `mcp::load_runtime_or_warn()` to `AcpServer::new()` widened the gap between set and read enough to flip the race deterministic. Guarding both tests with a shared `Mutex<()>` serializes them without adding a `serial_test` dep. Poisoned guard is recovered via `into_inner()` — we don't care if a prior assertion panicked. --- openab-agent/src/acp.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 8529f7e82..3556b3571 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -256,6 +256,13 @@ impl AcpServer { #[cfg(test)] mod tests { use super::*; + use std::sync::Mutex; + + /// Serializes tests that mutate process-global env vars (notably + /// `ANTHROPIC_API_KEY`). Without this, `test_session_new` and + /// `test_session_new_missing_key` race on the same key when run in + /// parallel — set/remove from one thread is observed by the other. + static ENV_LOCK: Mutex<()> = Mutex::new(()); #[test] fn test_initialize_response() { @@ -270,6 +277,7 @@ mod tests { #[test] fn test_session_new() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); // Set a fake key so from_env() succeeds in CI unsafe { std::env::set_var("ANTHROPIC_API_KEY", "test-key") }; let mut server = AcpServer::new(); @@ -282,6 +290,7 @@ mod tests { #[test] fn test_session_new_missing_key() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); // Ensure no OAuth token exists either let auth_path = std::path::PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string())) From f405646b7aa0137179db33ff39dbd1e53c6ff027 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:04:24 +0000 Subject: [PATCH 31/98] refactor(openab-agent/mcp): apply Mira's Phase 1 polish review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups from Mira's Tick 24 review: 1. `load_runtime_or_warn()` now returns `Option` — `None` when `cfg.servers` is empty so callers skip the entire MCP path (saves system-prompt tokens; LLM doesn't see an empty tool surface and try to use it). `AcpServer.mcp_manager` shape follows. 2. Replaced `let mut + push_str` on `base` with shadowing via `#[cfg(feature = "mcp")] let base = ...` — drops the `cfg_attr(allow(unused_mut))` markers entirely. The `tools` Vec uses two cfg arms (a single shared `let mut t = ...` would still trigger `unused_mut` on no-feature builds because the only write is cfg-gated). 3. Reordered the system-prompt appendix to lead with `list_servers` instead of `help`. Saves the LLM one tool-call round-trip when the user's request requires picking a configured server. --- openab-agent/src/acp.rs | 4 ++-- openab-agent/src/agent.rs | 35 +++++++++++++++++++++-------------- openab-agent/src/mcp/mod.rs | 18 ++++++++++++------ 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 3556b3571..5d7f4c412 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -38,7 +38,7 @@ pub struct AcpServer { sessions: HashMap, working_dir: String, #[cfg(feature = "mcp")] - mcp_manager: McpRuntimeManager, + mcp_manager: Option, } impl AcpServer { @@ -164,7 +164,7 @@ impl AcpServer { provider, self.working_dir.clone(), #[cfg(feature = "mcp")] - Some(self.mcp_manager.clone()), + self.mcp_manager.clone(), ); self.sessions.insert(session_id.clone(), agent); let resp = JsonRpcResponse { diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index 419e91f72..63f240d61 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -22,9 +22,10 @@ Be direct and concise. Execute tasks immediately rather than explaining what you #[cfg(feature = "mcp")] const MCP_SYSTEM_PROMPT_APPENDIX: &str = "\n\nAdditional tool:\n\ - - mcp: Talk to configured MCP servers. Always call `mcp(action=\"help\")` \ - first to learn the action surface, then `mcp(action=\"list_servers\")` to see \ - what's configured before calling tools."; + - mcp: Talk to configured MCP servers. Call `mcp(action=\"list_servers\")` \ + to see what's configured, then `mcp(action=\"list_tools\", server=...)` to \ + discover per-server tools. Use `mcp(action=\"help\")` only if action shapes \ + are unclear."; const MAX_TOOL_LOOPS: usize = 50; /// Maximum number of messages to keep in context. When exceeded, oldest @@ -66,12 +67,16 @@ impl Agent { #[cfg(not(feature = "mcp"))] let has_mcp = false; let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); - #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] - let mut tools = tools::tool_definitions(); #[cfg(feature = "mcp")] - if mcp_manager.is_some() { - tools.push(mcp::mcp_tool_def()); - } + let tools = { + let mut t = tools::tool_definitions(); + if mcp_manager.is_some() { + t.push(mcp::mcp_tool_def()); + } + t + }; + #[cfg(not(feature = "mcp"))] + let tools = tools::tool_definitions(); Self { provider, messages: Vec::new(), @@ -85,23 +90,25 @@ impl Agent { /// Run the agent with a user prompt, executing tool calls until completion. /// Returns the final text response. - #[cfg_attr(not(feature = "mcp"), allow(unused_variables))] fn build_system_prompt(working_dir: &str, mcp_enabled: bool) -> String { + #[cfg(not(feature = "mcp"))] + let _ = mcp_enabled; let wd = std::path::Path::new(working_dir); let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); - #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] - let mut base = if custom.is_empty() { + let base = if custom.is_empty() { SYSTEM_PROMPT.to_string() } else { format!("{}\n\n---\n\n{}", custom.trim(), SYSTEM_PROMPT) }; #[cfg(feature = "mcp")] - if mcp_enabled { - base.push_str(MCP_SYSTEM_PROMPT_APPENDIX); - } + let base = if mcp_enabled { + format!("{base}{MCP_SYSTEM_PROMPT_APPENDIX}") + } else { + base + }; let discovered = skills::discover_skills(wd); if discovered.is_empty() { diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 2bd9dadee..55f210c16 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -58,16 +58,22 @@ fn load_config_or_exit() -> McpConfig { }) } -/// Construct an `McpRuntimeManager` from on-disk config, falling back to an -/// empty manager (with a `tracing::warn!`) on parse failure. Long-running -/// servers (ACP, future HTTP) call this so a malformed `mcp.json` cannot -/// kill the host process — CLI subcommands use `load_config_or_exit` instead. -pub fn load_runtime_or_warn() -> McpRuntimeManager { +/// Construct an `McpRuntimeManager` from on-disk config — returns `None` +/// when no servers are configured so callers can skip the entire MCP path +/// (saves system-prompt tokens + keeps the LLM from hallucinating an empty +/// tool surface). Parse failure falls back to `None` with a `tracing::warn!`. +/// Long-running servers (ACP, future HTTP) call this; CLI subcommands use +/// `load_config_or_exit` instead. +pub fn load_runtime_or_warn() -> Option { let cfg = McpConfig::load().unwrap_or_else(|e| { tracing::warn!("mcp config failed to load, starting with no servers: {e:#}"); McpConfig::default() }); - McpRuntimeManager::from_config(cfg) + if cfg.servers.is_empty() { + None + } else { + Some(McpRuntimeManager::from_config(cfg)) + } } /// `openab-agent mcp list [--resolve]`. From 9aa51a13166ce23eb8880149ac01fc62b3172b8e Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:19:07 +0000 Subject: [PATCH 32/98] feat(openab-agent/mcp): wire anonymous Streamable HTTP transport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 first slice (ADR §5.7, §6). HTTP servers without `oauth` now dial through rmcp's `StreamableHttpClientTransport::from_uri`; servers that DO declare `oauth` are rejected before the `Connecting` state transition so the runtime never advertises an attempt that wasn't made. The OAuth-protected reject path makes the gap between Phase 2-anon and Phase 2-auth explicit — `mcp login` lands in the next slice. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 102 ++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 24 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 5fa41c8d3..933fd23d5 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use rmcp::service::{RoleClient, RunningService}; -use rmcp::transport::{ConfigureCommandExt, TokioChildProcess}; +use rmcp::transport::{ConfigureCommandExt, StreamableHttpClientTransport, TokioChildProcess}; use rmcp::ServiceExt; use tokio::process::Command; use tokio::sync::RwLock; @@ -147,7 +147,8 @@ impl McpRuntimeManager { } /// Lazy-connect the named server (ADR §5.7). Idempotent if already - /// `Connected` with a live client. HTTP transport is Phase 2. + /// `Connected` with a live client. HTTP servers requiring OAuth are + /// rejected until the Phase 2 auth slice lands (ADR §6). pub async fn connect(&self, name: &str) -> Result<()> { let dial = { let mut guard = self.handles.write().await; @@ -161,10 +162,21 @@ impl McpRuntimeManager { let dial = match resolved { ServerConfig::Stdio { command, args, env, .. - } => StdioDial { command, args, env }, - ServerConfig::Http { .. } => { - return Err(anyhow!("http transport lands in phase 2 (server {name:?})")); + } => Dial::Stdio { command, args, env }, + // Reject oauth-protected servers BEFORE the `Connecting` + // transition: we never attempted a handshake, so leaving + // status at `Disconnected` is the honest state. Status + // becomes `Failed` only when a dial was actually tried. + ServerConfig::Http { + oauth: Some(_), + url, + .. + } => { + return Err(anyhow!( + "oauth-protected http server {url:?} requires the auth slice (Phase 2 §6)" + )); } + ServerConfig::Http { url, .. } => Dial::Http { url }, }; handle.status = ServerStatus::Connecting; dial @@ -197,24 +209,41 @@ impl McpRuntimeManager { } } -struct StdioDial { - command: String, - args: Vec, - env: HashMap, +/// Per-transport dial parameters, extracted under the manager's write lock +/// then dialed without holding the lock. Flat (no nested `*Dial` structs) +/// because two variants don't warrant a dispatch enum. +enum Dial { + Stdio { + command: String, + args: Vec, + env: HashMap, + }, + Http { + url: String, + }, } -impl StdioDial { +impl Dial { async fn run(self) -> Result> { - let Self { command, args, env } = self; - let cmd = Command::new(&command).configure(|c| { - c.args(&args); - c.envs(&env); - }); - let transport = TokioChildProcess::new(cmd) - .with_context(|| format!("spawn mcp child process {command:?}"))?; - ().serve(transport) - .await - .with_context(|| format!("mcp handshake with {command:?}")) + match self { + Dial::Stdio { command, args, env } => { + let cmd = Command::new(&command).configure(|c| { + c.args(&args); + c.envs(&env); + }); + let transport = TokioChildProcess::new(cmd) + .with_context(|| format!("spawn mcp child process {command:?}"))?; + ().serve(transport) + .await + .with_context(|| format!("mcp handshake with {command:?}")) + } + Dial::Http { url } => { + let transport = StreamableHttpClientTransport::from_uri(url.as_str()); + ().serve(transport) + .await + .with_context(|| format!("mcp handshake with {url:?}")) + } + } } } @@ -269,20 +298,45 @@ mod tests { } #[tokio::test] - async fn connect_http_returns_phase2_error() { + async fn connect_http_with_oauth_defers_to_auth_slice() { let json = r#"{ "mcpServers": { - "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); - assert!(err.contains("phase 2"), "expected 'phase 2' in {err}"); - // Status not advanced past Disconnected for unsupported transports. + assert!(err.contains("oauth"), "expected 'oauth' in {err}"); + // OAuth rejection happens BEFORE the Connecting transition, so the + // server remains Disconnected — no dial was attempted. assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); } + #[tokio::test] + async fn connect_http_anonymous_to_dead_address_records_failed() { + // 127.0.0.1:1 is a TCP port that no MCP server will ever bind. The + // handshake `.serve()` future fails fast at the connect() syscall, + // so this test stays hermetic — no network reachability assumed. + let json = r#"{ + "mcpServers": { + "dead": { "type": "http", "url": "http://127.0.0.1:1/mcp" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = mgr.connect("dead").await.unwrap_err().to_string(); + assert!(err.contains("handshake"), "expected 'handshake' in {err}"); + match &mgr.statuses().await[0].1 { + ServerStatus::Failed(_) => {} + other => panic!("expected Failed, got {other:?}"), + } + } + #[tokio::test] async fn connect_to_missing_binary_records_failed() { let json = r#"{ From 2a69bf9d9933c4cec69b48a53bf3dd2e1945aa4a Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:25:42 +0000 Subject: [PATCH 33/98] =?UTF-8?q?feat(openab-agent/auth):=20namespaced=20T?= =?UTF-8?q?okenStore=20+=20fsync=20(ADR=20=C2=A76.1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 §6.1 foundation. `auth.json` switches from a bare `TokenStore` to `HashMap` so MCP server credentials can sit alongside the existing Codex slot (`mcp:` vs `codex`). Legacy single-tenant files migrate transparently on read (discriminated by the top-level `access_token` key); the on-disk shape rewrites to the new layout on the next save. Codex login flows keep their existing public API — `load_tokens`/`save_tokens` now route through the codex namespace internally. `fsync(2)` lands on every write per the refresh-token rotation race contract: without it, a Spot interruption between local write and S3 sync could restore a revoked refresh token from durable storage. Public helpers for the MCP path (`load_/save_/remove_namespaced_token`) are feature-gated and `#[allow(dead_code)]` until the `mcp/oauth.rs` slice lands. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 173 +++++++++++++++++++++++++++++++++++---- 1 file changed, 158 insertions(+), 15 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 385ccede9..f1abacc76 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -2,11 +2,16 @@ use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; +use std::collections::HashMap; use std::io::{BufRead, Write}; use std::net::TcpListener; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; +/// Namespace key for the existing Codex single-tenant credential. +/// Lives next to future `mcp:` entries inside `auth.json`. +const CODEX_NAMESPACE: &str = "codex"; + const REFRESH_SKEW_SECONDS: u64 = 120; const CODEX_AUTHORIZE_URL: &str = "https://auth.openai.com/oauth/authorize"; @@ -42,23 +47,36 @@ fn auth_path() -> PathBuf { .join("auth.json") } -pub fn load_tokens() -> Result { - let path = auth_path(); - let data = std::fs::read_to_string(&path).map_err(|_| { - anyhow!( - "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", - path.display() - ) - })?; - serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}")) +/// Read the `auth.json` map, transparently migrating a legacy single-tenant +/// Codex token file into the new namespaced shape. The migrated map is held +/// in-memory only; the file is rewritten in the new shape on the next save. +/// +/// Discriminates by the top-level `access_token` key — present means the +/// file is the legacy `TokenStore` shape, absent means the new namespaced +/// map. A single JSON parse gives accurate error context either way. +fn read_auth_file(path: &Path) -> Result> { + let data = std::fs::read_to_string(path)?; + let value: serde_json::Value = + serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; + if value.get("access_token").is_some() { + let legacy: TokenStore = serde_json::from_value(value) + .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; + let mut map = HashMap::new(); + map.insert(CODEX_NAMESPACE.to_string(), legacy); + return Ok(map); + } + serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) } -fn save_tokens(store: &TokenStore) -> Result<()> { - let path = auth_path(); +/// Atomically replace `auth.json` with the new map. `fsync(2)` after write +/// satisfies the ADR §6.1 refresh-token rotation contract — without it, a +/// Spot interruption between local write and S3 sync would restore a +/// revoked refresh token from durable storage on the next task start. +fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } - let data = serde_json::to_string_pretty(store)?; + let data = serde_json::to_string_pretty(map)?; #[cfg(unix)] { use std::fs::OpenOptions; @@ -69,16 +87,87 @@ fn save_tokens(store: &TokenStore) -> Result<()> { .create(true) .truncate(true) .mode(0o600) - .open(&path)?; + .open(path)?; file.write_all(data.as_bytes())?; + file.sync_all()?; } #[cfg(not(unix))] { - std::fs::write(&path, &data)?; + std::fs::write(path, &data)?; } Ok(()) } +pub fn load_tokens() -> Result { + let path = auth_path(); + let map = read_auth_file(&path).map_err(|_| { + anyhow!( + "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + })?; + map.get(CODEX_NAMESPACE).cloned().ok_or_else(|| { + anyhow!( + "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + }) +} + +fn save_tokens(store: &TokenStore) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(CODEX_NAMESPACE.to_string(), store.clone()); + write_auth_file(&path, &map) +} + +/// Look up the credential at `key` (e.g. `mcp:linear`). Returns the codex +/// entry for `key = "codex"`, but prefer `load_tokens()` for that path — +/// this helper exists for MCP server-namespaced lookups (ADR §6.1). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) +pub fn load_namespaced_token(key: &str) -> Result { + let path = auth_path(); + let map = read_auth_file(&path) + .map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + map.get(key) + .cloned() + .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) +} + +/// Insert or replace the credential at `key`, preserving all other entries. +/// Read-modify-write on a single file: callers in the same process must +/// serialize themselves (the lifecycle manager already does per ADR §5.7). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) +pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(key.to_string(), store.clone()); + write_auth_file(&path, &map) +} + +/// Remove the credential at `key`. Idempotent — missing key is not an +/// error. If the map becomes empty, the file is deleted so `mcp doctor` +/// can report "no credentials" instead of "empty file". +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp logout / revoked-refresh recovery) +pub fn remove_namespaced_token(key: &str) -> Result<()> { + let path = auth_path(); + let mut map = match read_auth_file(&path) { + Ok(m) => m, + Err(_) => return Ok(()), + }; + if map.remove(key).is_none() { + return Ok(()); + } + if map.is_empty() { + let _ = std::fs::remove_file(&path); + return Ok(()); + } + write_auth_file(&path, &map) +} + fn is_expired(store: &TokenStore) -> bool { let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -535,4 +624,58 @@ mod tests { let expected = URL_SAFE_NO_PAD.encode(Sha256::digest(verifier.as_bytes())); assert_eq!(challenge, expected); } + + #[test] + fn read_auth_file_migrates_legacy_single_tenant_format() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let legacy = serde_json::to_string_pretty(&make_store(9_999_999_999)).unwrap(); + std::fs::write(&path, legacy).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 1); + assert_eq!( + map.get(CODEX_NAMESPACE).unwrap().access_token, + "test_access_token_value" + ); + } + + #[test] + fn read_auth_file_parses_new_namespaced_format() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), make_store(1)); + input.insert("mcp:linear".to_string(), make_store(2)); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(map.get("codex").unwrap().expires_at, 1); + assert_eq!(map.get("mcp:linear").unwrap().expires_at, 2); + } + + #[test] + fn write_auth_file_round_trips_through_disk() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("mcp:github".to_string(), make_store(42)); + write_auth_file(&path, &input).unwrap(); + let raw = std::fs::read_to_string(&path).unwrap(); + assert!(raw.contains("mcp:github")); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.get("mcp:github").unwrap().expires_at, 42); + } + + #[cfg(unix)] + #[test] + fn write_auth_file_creates_file_with_0600_mode() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), make_store(0)); + write_auth_file(&path, &input).unwrap(); + let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); + } } From 06acde6ac330f4a7400e3bfb8754e83f312e6ba1 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:31:46 +0000 Subject: [PATCH 34/98] fix(openab-agent/auth): rustfmt break-after-= for long chained let MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI cargo fmt --check rejected the chain-break form (`let map = read_auth_file(&path)\n .map_err(...)`) — for a single-method chain that fits inline at 99 chars, rustfmt prefers breaking after the `=` and keeping the chain whole. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index f1abacc76..b83179e15 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -128,8 +128,8 @@ fn save_tokens(store: &TokenStore) -> Result<()> { #[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) pub fn load_namespaced_token(key: &str) -> Result { let path = auth_path(); - let map = read_auth_file(&path) - .map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + let map = + read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; map.get(key) .cloned() .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) From 4c77595003b0b7e6b4f12ac5c33a6cda6732036f Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:46:43 +0000 Subject: [PATCH 35/98] =?UTF-8?q?feat(openab-agent/mcp):=20add=20OAuth=20p?= =?UTF-8?q?rovider=20catalog=20(ADR=20=C2=A76.2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Built-in ProviderSpec table + resolve() that hands callers a (spec, scopes) pair. Empty cfg.scopes falls back to spec defaults; non-empty replaces them entirely. Custom providers (§6.3) deferred — anything not in the built-in list errors out for now. Module-level #![allow(dead_code)] since the first prod caller is the §6.4 login flow slice; until then only the unit tests below exercise this code. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/mod.rs | 1 + openab-agent/src/mcp/oauth.rs | 139 ++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 openab-agent/src/mcp/oauth.rs diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 55f210c16..17884c9c1 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -2,6 +2,7 @@ pub mod config; pub mod meta_tool; +pub mod oauth; pub mod runtime; use serde_json::json; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs new file mode 100644 index 000000000..e05f7d77e --- /dev/null +++ b/openab-agent/src/mcp/oauth.rs @@ -0,0 +1,139 @@ +//! OAuth provider catalog (ADR §6.2). Wiring into the rmcp Streamable HTTP +//! transport + agent-guided flows (§6.4) lands in subsequent slices; this +//! module is the data layer the login / refresh code will dispatch through. +//! +//! Scopes are stored as `&'static [&'static str]` so callers can join them +//! with the space delimiter the OAuth 2.1 spec mandates without owning a +//! `Vec`. Per-server overrides (`OAuthConfig.scopes`) replace the defaults +//! and pay for a `Vec` at the boundary. + +// The §6.4 login slice is the first prod caller — until then, every item +// here is reachable only via the unit tests below, so `cargo clippy +// --features mcp -- -D warnings` would flag them as dead. Module-scope +// allow rather than per-item once that slice lands. +#![allow(dead_code)] + +use anyhow::{anyhow, Result}; + +use super::config::OAuthConfig; + +/// Static description of a single OAuth provider — URLs + the loopback +/// redirect the §6.4 browser flow listens on. `default_scopes` is the +/// minimum set the agent will request when `oauth.scopes` is omitted +/// from the server config; per-server overrides win when present. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ProviderSpec { + pub authorize_url: &'static str, + pub token_url: &'static str, + pub callback: &'static str, + pub default_scopes: &'static [&'static str], +} + +/// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` +/// is the broadest grant; consumers should narrow via per-server overrides. +pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { + authorize_url: "https://claude.ai/oauth/authorize", + token_url: "https://platform.claude.com/v1/oauth/token", + callback: "http://localhost:53692/callback", + default_scopes: &[ + "org:create_api_key", + "user:profile", + "user:inference", + "user:sessions:claude_code", + "user:mcp_servers", + "user:file_upload", + ], +}; + +/// Look up a built-in `ProviderSpec` by config name. Returns `None` for +/// custom providers (handled by §6.3 once `OAuthConfig` grows the URL +/// fields) and for unknown names. +pub fn builtin(name: &str) -> Option { + match name { + "anthropic-mcp" => Some(ANTHROPIC_MCP), + _ => None, + } +} + +/// Resolve a server's `oauth:` block to a `ProviderSpec` plus the effective +/// scope list. `OAuthConfig::scopes`, when non-empty, replaces the spec's +/// defaults entirely — the caller never needs to merge. +/// +/// Custom providers (per ADR §6.3) require `OAuthConfig` to grow +/// `authorize_url` / `token_url` fields; until that lands, an `oauth:` +/// block without a known `provider` is an error. +pub fn resolve(cfg: &OAuthConfig) -> Result<(ProviderSpec, Vec)> { + let provider = cfg + .provider + .as_deref() + .ok_or_else(|| anyhow!("oauth.provider is required (custom providers land in §6.3)"))?; + let spec = builtin(provider) + .ok_or_else(|| anyhow!("unknown oauth provider {provider:?} (built-ins: anthropic-mcp)"))?; + let scopes = if cfg.scopes.is_empty() { + spec.default_scopes.iter().map(|s| s.to_string()).collect() + } else { + cfg.scopes.clone() + }; + Ok((spec, scopes)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn anthropic_mcp_spec_matches_adr_table() { + let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); + assert_eq!(spec.authorize_url, "https://claude.ai/oauth/authorize"); + assert_eq!(spec.token_url, "https://platform.claude.com/v1/oauth/token"); + assert_eq!(spec.callback, "http://localhost:53692/callback"); + assert!(spec.default_scopes.contains(&"user:mcp_servers")); + } + + #[test] + fn unknown_provider_returns_none() { + assert!(builtin("does-not-exist").is_none()); + assert!(builtin("").is_none()); + } + + #[test] + fn resolve_uses_default_scopes_when_config_omits_them() { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + scopes: vec![], + }; + let (spec, scopes) = resolve(&cfg).unwrap(); + assert_eq!(spec, ANTHROPIC_MCP); + assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + } + + #[test] + fn resolve_uses_config_scopes_when_provided() { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + scopes: vec!["user:profile".to_string(), "user:inference".to_string()], + }; + let (_, scopes) = resolve(&cfg).unwrap(); + assert_eq!(scopes, vec!["user:profile", "user:inference"]); + } + + #[test] + fn resolve_rejects_missing_provider() { + let cfg = OAuthConfig { + provider: None, + scopes: vec![], + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("required"), "got: {err}"); + } + + #[test] + fn resolve_rejects_unknown_provider() { + let cfg = OAuthConfig { + provider: Some("github-copilot".to_string()), + scopes: vec![], + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("unknown oauth provider"), "got: {err}"); + } +} From da86d5be85d3508ad716621137a24a3b0259ba64 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:55:31 +0000 Subject: [PATCH 36/98] =?UTF-8?q?feat(openab-agent/mcp):=20OAuthConfig=20?= =?UTF-8?q?=C2=A76.3=20fields=20+=20discovery=20boot=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends OAuthConfig with authorize_url / token_url / client_id / device_authorization_endpoint / discovery / discovery_allowlist so custom OAuth 2.1 providers can be declared inline. Adds validate() that rejects discovery=true without an explicit allowlist (RFC 8414 SSRF guard, ADR §6.3 / §6.4) and hooks it into load_layered. oauth.rs tests switch to ..Default::default() so future field additions don't churn the test struct literals. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 130 ++++++++++++++++++++++++++++++++- openab-agent/src/mcp/oauth.rs | 10 +-- 2 files changed, 130 insertions(+), 10 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 742459430..6509837a7 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -57,15 +57,46 @@ pub struct ToolFilter { pub exclude: Vec, } -/// OAuth block. Phase 1 only parses `provider` + `scopes`; custom-provider -/// fields (§6.3: `authorize_url`, `token_url`, `device_authorization_endpoint`, -/// `discovery`, `discovery_allowlist`) land with the Phase 2 auth slice. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// OAuth block. +/// +/// `provider` selects a built-in spec from `oauth::builtin()`. Setting it +/// to an unknown name + supplying `authorize_url` / `token_url` defines a +/// custom OAuth 2.1 provider (ADR §6.3). `discovery: true` opts into +/// RFC 8414 dynamic discovery and requires a non-empty +/// `discovery_allowlist` of domains (§6.4 SSRF guard). +#[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct OAuthConfig { #[serde(default)] pub provider: Option, #[serde(default)] pub scopes: Vec, + #[serde(default)] + pub authorize_url: Option, + #[serde(default)] + pub token_url: Option, + #[serde(default)] + pub client_id: Option, + #[serde(default)] + pub device_authorization_endpoint: Option, + #[serde(default)] + pub discovery: bool, + #[serde(default)] + pub discovery_allowlist: Vec, +} + +impl OAuthConfig { + /// Boot-time validation (ADR §6.3 / §6.4). `discovery: true` without an + /// explicit allowlist is rejected — RFC 8414 lookups in multi-tenant + /// deployments would otherwise become an SSRF vector. + pub fn validate(&self, server: &str) -> Result<()> { + if self.discovery && self.discovery_allowlist.is_empty() { + return Err(anyhow!( + "mcp server {server:?}: oauth.discovery=true requires \ + a non-empty oauth.discovery_allowlist (ADR §6.3)" + )); + } + Ok(()) + } } impl McpConfig { @@ -89,9 +120,21 @@ impl McpConfig { let layer = Self::load_file(path)?; merged.servers.extend(layer.servers); } + merged.validate()?; Ok(merged) } + /// Validate every server's `oauth` block (ADR §6.3 boot check). Returns + /// the first failure — finer-grained per-server isolation lives in §5.6. + pub fn validate(&self) -> Result<()> { + for (name, server) in &self.servers { + if let ServerConfig::Http { oauth: Some(oauth), .. } = server { + oauth.validate(name)?; + } + } + Ok(()) + } + fn load_file(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .with_context(|| format!("read mcp config {}", path.display()))?; @@ -287,4 +330,83 @@ mod tests { _ => unreachable!(), } } + + #[test] + fn parses_custom_oauth_provider_fields() { + let json = r#"{ + "mcpServers": { + "custom": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { + "provider": "custom", + "authorize_url": "https://example.com/oauth/authorize", + "token_url": "https://example.com/oauth/token", + "client_id": "abc123", + "device_authorization_endpoint": "https://example.com/oauth/device", + "discovery": true, + "discovery_allowlist": ["*.example.com"] + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let ServerConfig::Http { oauth: Some(oauth), .. } = cfg.servers.get("custom").unwrap() + else { + panic!("expected http with oauth"); + }; + assert_eq!( + oauth.authorize_url.as_deref(), + Some("https://example.com/oauth/authorize"), + ); + assert_eq!( + oauth.token_url.as_deref(), + Some("https://example.com/oauth/token"), + ); + assert_eq!(oauth.client_id.as_deref(), Some("abc123")); + assert_eq!( + oauth.device_authorization_endpoint.as_deref(), + Some("https://example.com/oauth/device"), + ); + assert!(oauth.discovery); + assert_eq!(oauth.discovery_allowlist, vec!["*.example.com".to_string()]); + } + + #[test] + fn validate_rejects_discovery_without_allowlist() { + let oauth = OAuthConfig { + provider: Some("custom".into()), + discovery: true, + ..Default::default() + }; + let err = oauth.validate("srv").unwrap_err().to_string(); + assert!(err.contains("discovery_allowlist"), "got: {err}"); + assert!(err.contains("srv"), "got: {err}"); + } + + #[test] + fn validate_accepts_discovery_with_allowlist() { + let oauth = OAuthConfig { + provider: Some("custom".into()), + discovery: true, + discovery_allowlist: vec!["*.example.com".into()], + ..Default::default() + }; + oauth.validate("srv").unwrap(); + } + + #[test] + fn load_layered_rejects_invalid_discovery_config() { + let dir = tempfile::tempdir().unwrap(); + let project = dir.path().join("project.json"); + std::fs::write( + &project, + r#"{"mcpServers":{"bad":{"type":"http","url":"https://example.com","oauth":{"provider":"custom","discovery":true}}}}"#, + ) + .unwrap(); + let err = McpConfig::load_layered(None, Some(&project)) + .unwrap_err() + .to_string(); + assert!(err.contains("discovery_allowlist"), "got: {err}"); + } } diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index e05f7d77e..e31d9807b 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -100,7 +100,7 @@ mod tests { fn resolve_uses_default_scopes_when_config_omits_them() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), - scopes: vec![], + ..Default::default() }; let (spec, scopes) = resolve(&cfg).unwrap(); assert_eq!(spec, ANTHROPIC_MCP); @@ -112,6 +112,7 @@ mod tests { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), scopes: vec!["user:profile".to_string(), "user:inference".to_string()], + ..Default::default() }; let (_, scopes) = resolve(&cfg).unwrap(); assert_eq!(scopes, vec!["user:profile", "user:inference"]); @@ -119,10 +120,7 @@ mod tests { #[test] fn resolve_rejects_missing_provider() { - let cfg = OAuthConfig { - provider: None, - scopes: vec![], - }; + let cfg = OAuthConfig::default(); let err = resolve(&cfg).unwrap_err().to_string(); assert!(err.contains("required"), "got: {err}"); } @@ -131,7 +129,7 @@ mod tests { fn resolve_rejects_unknown_provider() { let cfg = OAuthConfig { provider: Some("github-copilot".to_string()), - scopes: vec![], + ..Default::default() }; let err = resolve(&cfg).unwrap_err().to_string(); assert!(err.contains("unknown oauth provider"), "got: {err}"); From 7ea3e5931634b8b30ecdc96d0ae79b235a4f5b22 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:10:55 +0000 Subject: [PATCH 37/98] fix(openab-agent/mcp): rustfmt struct-pattern nested-call multi-line Burned Tick 30: `if let ServerConfig::Http { oauth: Some(oauth), .. } = server` at 74 chars fmt-rejects because the nested `Some(oauth)` binding forces multi-line struct-pattern formatting regardless of total line width. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 6509837a7..003bb5310 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -128,7 +128,10 @@ impl McpConfig { /// the first failure — finer-grained per-server isolation lives in §5.6. pub fn validate(&self) -> Result<()> { for (name, server) in &self.servers { - if let ServerConfig::Http { oauth: Some(oauth), .. } = server { + if let ServerConfig::Http { + oauth: Some(oauth), .. + } = server + { oauth.validate(name)?; } } @@ -351,7 +354,9 @@ mod tests { } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let ServerConfig::Http { oauth: Some(oauth), .. } = cfg.servers.get("custom").unwrap() + let ServerConfig::Http { + oauth: Some(oauth), .. + } = cfg.servers.get("custom").unwrap() else { panic!("expected http with oauth"); }; From 65f1c4d8a7faa86e2826c6535f57b28ecfe6a060 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:23:57 +0000 Subject: [PATCH 38/98] =?UTF-8?q?feat(openab-agent/mcp):=20resolve=20custo?= =?UTF-8?q?m=20OAuth=20providers=20(ADR=20=C2=A76.3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolve() now returns ResolvedProvider (owned strings) instead of (ProviderSpec, Vec). Built-in providers fill it from their static spec; unknown providers fall through to the custom path, which requires authorize_url + token_url and propagates client_id / device_authorization_endpoint when supplied. callback is None for custom (§6.4 picks the port at login time). Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 167 ++++++++++++++++++++++++++-------- 1 file changed, 131 insertions(+), 36 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index e31d9807b..52d61a36f 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -1,11 +1,7 @@ -//! OAuth provider catalog (ADR §6.2). Wiring into the rmcp Streamable HTTP -//! transport + agent-guided flows (§6.4) lands in subsequent slices; this -//! module is the data layer the login / refresh code will dispatch through. -//! -//! Scopes are stored as `&'static [&'static str]` so callers can join them -//! with the space delimiter the OAuth 2.1 spec mandates without owning a -//! `Vec`. Per-server overrides (`OAuthConfig.scopes`) replace the defaults -//! and pay for a `Vec` at the boundary. +//! OAuth provider catalog (ADR §6.2) + custom-provider resolution (§6.3). +//! Wiring into the rmcp Streamable HTTP transport + agent-guided flows +//! (§6.4) lands in subsequent slices; this module is the data layer the +//! login / refresh code will dispatch through. // The §6.4 login slice is the first prod caller — until then, every item // here is reachable only via the unit tests below, so `cargo clippy @@ -17,9 +13,8 @@ use anyhow::{anyhow, Result}; use super::config::OAuthConfig; -/// Static description of a single OAuth provider — URLs + the loopback -/// redirect the §6.4 browser flow listens on. `default_scopes` is the -/// minimum set the agent will request when `oauth.scopes` is omitted +/// Static description of a single built-in OAuth provider. `default_scopes` +/// is the minimum set the agent will request when `oauth.scopes` is omitted /// from the server config; per-server overrides win when present. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ProviderSpec { @@ -46,8 +41,7 @@ pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { }; /// Look up a built-in `ProviderSpec` by config name. Returns `None` for -/// custom providers (handled by §6.3 once `OAuthConfig` grows the URL -/// fields) and for unknown names. +/// custom providers (§6.3) and for unknown names. pub fn builtin(name: &str) -> Option { match name { "anthropic-mcp" => Some(ANTHROPIC_MCP), @@ -55,26 +49,71 @@ pub fn builtin(name: &str) -> Option { } } -/// Resolve a server's `oauth:` block to a `ProviderSpec` plus the effective -/// scope list. `OAuthConfig::scopes`, when non-empty, replaces the spec's -/// defaults entirely — the caller never needs to merge. +/// Effective per-server OAuth parameters after resolving the built-in catalog +/// and `OAuthConfig` overrides. `callback` is `None` for custom providers +/// (§6.4 picks a free port at login time); built-ins pin theirs. `client_id` +/// is `None` for built-ins (the per-provider flow code in §6.4 owns it) and +/// optional for custom providers — OAuth 2.1 servers vary on whether public +/// clients must register. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ResolvedProvider { + pub authorize_url: String, + pub token_url: String, + pub client_id: Option, + pub callback: Option, + pub device_authorization_endpoint: Option, + pub scopes: Vec, +} + +/// Resolve a server's `oauth:` block. Built-in providers come from +/// `builtin()`; unknown providers fall through to the §6.3 custom path, +/// which requires `authorize_url` + `token_url` on the config. /// -/// Custom providers (per ADR §6.3) require `OAuthConfig` to grow -/// `authorize_url` / `token_url` fields; until that lands, an `oauth:` -/// block without a known `provider` is an error. -pub fn resolve(cfg: &OAuthConfig) -> Result<(ProviderSpec, Vec)> { +/// `OAuthConfig::scopes`, when non-empty, replaces the spec's defaults +/// entirely — the caller never needs to merge. +pub fn resolve(cfg: &OAuthConfig) -> Result { let provider = cfg .provider .as_deref() - .ok_or_else(|| anyhow!("oauth.provider is required (custom providers land in §6.3)"))?; - let spec = builtin(provider) - .ok_or_else(|| anyhow!("unknown oauth provider {provider:?} (built-ins: anthropic-mcp)"))?; + .ok_or_else(|| anyhow!("oauth.provider is required"))?; + if let Some(spec) = builtin(provider) { + Ok(resolve_builtin(spec, cfg)) + } else { + resolve_custom(provider, cfg) + } +} + +fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { let scopes = if cfg.scopes.is_empty() { spec.default_scopes.iter().map(|s| s.to_string()).collect() } else { cfg.scopes.clone() }; - Ok((spec, scopes)) + ResolvedProvider { + authorize_url: spec.authorize_url.to_string(), + token_url: spec.token_url.to_string(), + client_id: None, + callback: Some(spec.callback.to_string()), + device_authorization_endpoint: None, + scopes, + } +} + +fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result { + let authorize_url = cfg.authorize_url.clone().ok_or_else(|| { + anyhow!("custom oauth provider {provider:?}: oauth.authorize_url is required (ADR §6.3)") + })?; + let token_url = cfg.token_url.clone().ok_or_else(|| { + anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") + })?; + Ok(ResolvedProvider { + authorize_url, + token_url, + client_id: cfg.client_id.clone(), + callback: None, + device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), + scopes: cfg.scopes.clone(), + }) } #[cfg(test)] @@ -97,41 +136,97 @@ mod tests { } #[test] - fn resolve_uses_default_scopes_when_config_omits_them() { + fn resolve_builtin_uses_default_scopes_when_config_omits_them() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), ..Default::default() }; - let (spec, scopes) = resolve(&cfg).unwrap(); - assert_eq!(spec, ANTHROPIC_MCP); - assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + let r = resolve(&cfg).unwrap(); + assert_eq!(r.authorize_url, ANTHROPIC_MCP.authorize_url); + assert_eq!(r.callback.as_deref(), Some(ANTHROPIC_MCP.callback)); + assert_eq!(r.scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + assert!(r.client_id.is_none()); + assert!(r.device_authorization_endpoint.is_none()); } #[test] - fn resolve_uses_config_scopes_when_provided() { + fn resolve_builtin_uses_config_scopes_when_provided() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), scopes: vec!["user:profile".to_string(), "user:inference".to_string()], ..Default::default() }; - let (_, scopes) = resolve(&cfg).unwrap(); - assert_eq!(scopes, vec!["user:profile", "user:inference"]); + let r = resolve(&cfg).unwrap(); + assert_eq!(r.scopes, vec!["user:profile", "user:inference"]); } #[test] fn resolve_rejects_missing_provider() { - let cfg = OAuthConfig::default(); - let err = resolve(&cfg).unwrap_err().to_string(); + let err = resolve(&OAuthConfig::default()).unwrap_err().to_string(); assert!(err.contains("required"), "got: {err}"); } #[test] - fn resolve_rejects_unknown_provider() { + fn resolve_custom_uses_config_urls_and_propagates_device_endpoint() { + let cfg = OAuthConfig { + provider: Some("linear".to_string()), + authorize_url: Some("https://linear.app/oauth/authorize".to_string()), + token_url: Some("https://api.linear.app/oauth/token".to_string()), + client_id: Some("client-abc".to_string()), + device_authorization_endpoint: Some("https://linear.app/oauth/device".to_string()), + scopes: vec!["read".to_string(), "write".to_string()], + ..Default::default() + }; + let r = resolve(&cfg).unwrap(); + assert_eq!(r.authorize_url, "https://linear.app/oauth/authorize"); + assert_eq!(r.token_url, "https://api.linear.app/oauth/token"); + assert_eq!(r.client_id.as_deref(), Some("client-abc")); + assert_eq!( + r.device_authorization_endpoint.as_deref(), + Some("https://linear.app/oauth/device"), + ); + assert!( + r.callback.is_none(), + "custom providers defer callback to login-time port allocation", + ); + assert_eq!(r.scopes, vec!["read", "write"]); + } + + #[test] + fn resolve_custom_minimal_two_urls_only() { + let cfg = OAuthConfig { + provider: Some("acme".to_string()), + authorize_url: Some("https://acme.example/authorize".to_string()), + token_url: Some("https://acme.example/token".to_string()), + ..Default::default() + }; + let r = resolve(&cfg).unwrap(); + assert!(r.client_id.is_none()); + assert!(r.device_authorization_endpoint.is_none()); + assert!(r.callback.is_none()); + assert!(r.scopes.is_empty()); + } + + #[test] + fn resolve_custom_rejects_missing_authorize_url() { + let cfg = OAuthConfig { + provider: Some("custom".to_string()), + token_url: Some("https://example.com/token".to_string()), + ..Default::default() + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("authorize_url"), "got: {err}"); + assert!(err.contains("custom"), "got: {err}"); + } + + #[test] + fn resolve_custom_rejects_missing_token_url() { let cfg = OAuthConfig { - provider: Some("github-copilot".to_string()), + provider: Some("custom".to_string()), + authorize_url: Some("https://example.com/authorize".to_string()), ..Default::default() }; let err = resolve(&cfg).unwrap_err().to_string(); - assert!(err.contains("unknown oauth provider"), "got: {err}"); + assert!(err.contains("token_url"), "got: {err}"); } } From 04b84f95f3b4cbccb8dadda7d64663d92297f9c2 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:34:14 +0000 Subject: [PATCH 39/98] refactor(openab-agent/mcp): ResolvedProvider as enum {Builtin, Custom} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Mira's Tick 32 review: encode "callback always pinned for built-ins, dynamic for custom" and "client_id owned by §6.4 for built-ins, from config for custom" as variants instead of convention-based Option fields. §6.4 caller gets exhaustive matching for free. ProviderSpec gains a `name: &'static str` field so the resolver can copy it into Builtin's provider_name without re-matching the catalog key. BUILTINS slice replaces the duplicated string-key match in builtin() — single source of truth. authorize_url() / token_url() / scopes() getters keep the call sites that don't care about the variant from drowning in `match`. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 157 +++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 51 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 52d61a36f..c18ddddc6 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -18,6 +18,7 @@ use super::config::OAuthConfig; /// from the server config; per-server overrides win when present. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ProviderSpec { + pub name: &'static str, pub authorize_url: &'static str, pub token_url: &'static str, pub callback: &'static str, @@ -27,6 +28,7 @@ pub struct ProviderSpec { /// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` /// is the broadest grant; consumers should narrow via per-server overrides. pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { + name: "anthropic-mcp", authorize_url: "https://claude.ai/oauth/authorize", token_url: "https://platform.claude.com/v1/oauth/token", callback: "http://localhost:53692/callback", @@ -40,46 +42,86 @@ pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { ], }; +const BUILTINS: &[ProviderSpec] = &[ANTHROPIC_MCP]; + /// Look up a built-in `ProviderSpec` by config name. Returns `None` for /// custom providers (§6.3) and for unknown names. pub fn builtin(name: &str) -> Option { - match name { - "anthropic-mcp" => Some(ANTHROPIC_MCP), - _ => None, - } + BUILTINS.iter().copied().find(|spec| spec.name == name) } /// Effective per-server OAuth parameters after resolving the built-in catalog -/// and `OAuthConfig` overrides. `callback` is `None` for custom providers -/// (§6.4 picks a free port at login time); built-ins pin theirs. `client_id` -/// is `None` for built-ins (the per-provider flow code in §6.4 owns it) and -/// optional for custom providers — OAuth 2.1 servers vary on whether public -/// clients must register. +/// and `OAuthConfig` overrides. +/// +/// The two variants encode invariants that an `Option`-heavy struct couldn't: +/// built-ins always pin a `callback` (their PKCE port is hard-coded in the +/// provider's app registration) and never carry a `client_id` (the §6.4 flow +/// code owns it, mirroring `auth.rs::codex_client_id()`). Custom providers +/// flip both: §6.4 allocates a free port at login time, and `client_id` +/// comes from config (OAuth 2.1 public clients vary on registration). +/// +/// `device_authorization_endpoint` only appears on `Custom` — adding device +/// support for a built-in provider is a `ProviderSpec` schema change, not a +/// config flag. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct ResolvedProvider { - pub authorize_url: String, - pub token_url: String, - pub client_id: Option, - pub callback: Option, - pub device_authorization_endpoint: Option, - pub scopes: Vec, +pub enum ResolvedProvider { + Builtin { + provider_name: &'static str, + authorize_url: &'static str, + token_url: &'static str, + callback: &'static str, + scopes: Vec, + }, + Custom { + provider_name: String, + authorize_url: String, + token_url: String, + client_id: Option, + device_authorization_endpoint: Option, + scopes: Vec, + }, +} + +impl ResolvedProvider { + /// Accessor for the shared `authorize_url` field. Callers that don't + /// need to distinguish built-in vs custom can skip the `match`. + pub fn authorize_url(&self) -> &str { + match self { + Self::Builtin { authorize_url, .. } => authorize_url, + Self::Custom { authorize_url, .. } => authorize_url, + } + } + + /// Accessor for the shared `token_url` field. + pub fn token_url(&self) -> &str { + match self { + Self::Builtin { token_url, .. } => token_url, + Self::Custom { token_url, .. } => token_url, + } + } + + /// Accessor for the shared scope list. + pub fn scopes(&self) -> &[String] { + match self { + Self::Builtin { scopes, .. } | Self::Custom { scopes, .. } => scopes, + } + } } /// Resolve a server's `oauth:` block. Built-in providers come from /// `builtin()`; unknown providers fall through to the §6.3 custom path, /// which requires `authorize_url` + `token_url` on the config. /// -/// `OAuthConfig::scopes`, when non-empty, replaces the spec's defaults +/// `OAuthConfig::scopes`, when non-empty, replaces the built-in defaults /// entirely — the caller never needs to merge. pub fn resolve(cfg: &OAuthConfig) -> Result { let provider = cfg .provider .as_deref() .ok_or_else(|| anyhow!("oauth.provider is required"))?; - if let Some(spec) = builtin(provider) { - Ok(resolve_builtin(spec, cfg)) - } else { - resolve_custom(provider, cfg) + match builtin(provider) { + Some(spec) => Ok(resolve_builtin(spec, cfg)), + None => resolve_custom(provider, cfg), } } @@ -89,12 +131,11 @@ fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { } else { cfg.scopes.clone() }; - ResolvedProvider { - authorize_url: spec.authorize_url.to_string(), - token_url: spec.token_url.to_string(), - client_id: None, - callback: Some(spec.callback.to_string()), - device_authorization_endpoint: None, + ResolvedProvider::Builtin { + provider_name: spec.name, + authorize_url: spec.authorize_url, + token_url: spec.token_url, + callback: spec.callback, scopes, } } @@ -106,11 +147,11 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result let token_url = cfg.token_url.clone().ok_or_else(|| { anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") })?; - Ok(ResolvedProvider { + Ok(ResolvedProvider::Custom { + provider_name: provider.to_string(), authorize_url, token_url, client_id: cfg.client_id.clone(), - callback: None, device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), scopes: cfg.scopes.clone(), }) @@ -141,12 +182,15 @@ mod tests { provider: Some("anthropic-mcp".to_string()), ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.authorize_url, ANTHROPIC_MCP.authorize_url); - assert_eq!(r.callback.as_deref(), Some(ANTHROPIC_MCP.callback)); - assert_eq!(r.scopes.len(), ANTHROPIC_MCP.default_scopes.len()); - assert!(r.client_id.is_none()); - assert!(r.device_authorization_endpoint.is_none()); + let ResolvedProvider::Builtin { + provider_name, callback, scopes, .. + } = resolve(&cfg).unwrap() + else { + panic!("expected Builtin variant"); + }; + assert_eq!(provider_name, "anthropic-mcp"); + assert_eq!(callback, ANTHROPIC_MCP.callback); + assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); } #[test] @@ -157,7 +201,7 @@ mod tests { ..Default::default() }; let r = resolve(&cfg).unwrap(); - assert_eq!(r.scopes, vec!["user:profile", "user:inference"]); + assert_eq!(r.scopes(), &["user:profile", "user:inference"]); } #[test] @@ -177,19 +221,26 @@ mod tests { scopes: vec!["read".to_string(), "write".to_string()], ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.authorize_url, "https://linear.app/oauth/authorize"); - assert_eq!(r.token_url, "https://api.linear.app/oauth/token"); - assert_eq!(r.client_id.as_deref(), Some("client-abc")); + let ResolvedProvider::Custom { + provider_name, + authorize_url, + token_url, + client_id, + device_authorization_endpoint, + scopes, + } = resolve(&cfg).unwrap() + else { + panic!("expected Custom variant"); + }; + assert_eq!(provider_name, "linear"); + assert_eq!(authorize_url, "https://linear.app/oauth/authorize"); + assert_eq!(token_url, "https://api.linear.app/oauth/token"); + assert_eq!(client_id.as_deref(), Some("client-abc")); assert_eq!( - r.device_authorization_endpoint.as_deref(), + device_authorization_endpoint.as_deref(), Some("https://linear.app/oauth/device"), ); - assert!( - r.callback.is_none(), - "custom providers defer callback to login-time port allocation", - ); - assert_eq!(r.scopes, vec!["read", "write"]); + assert_eq!(scopes, vec!["read", "write"]); } #[test] @@ -200,11 +251,15 @@ mod tests { token_url: Some("https://acme.example/token".to_string()), ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert!(r.client_id.is_none()); - assert!(r.device_authorization_endpoint.is_none()); - assert!(r.callback.is_none()); - assert!(r.scopes.is_empty()); + let ResolvedProvider::Custom { + client_id, device_authorization_endpoint, scopes, .. + } = resolve(&cfg).unwrap() + else { + panic!("expected Custom variant"); + }; + assert!(client_id.is_none()); + assert!(device_authorization_endpoint.is_none()); + assert!(scopes.is_empty()); } #[test] From f61b498398ee50778e0382dc35539ab878a6d13b Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:41:05 +0000 Subject: [PATCH 40/98] fix(openab-agent/mcp): rustfmt per-line struct-pattern binders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Burned Tick 33: rustfmt's struct-pattern formatting splits ≥3 simple binders onto their own lines, even when the body would fit inline. Different rule from the nested-call case (Tick 30) where the body stays inline. Runbook updated with both rules side-by-side. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index c18ddddc6..fa6cb2497 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -183,7 +183,10 @@ mod tests { ..Default::default() }; let ResolvedProvider::Builtin { - provider_name, callback, scopes, .. + provider_name, + callback, + scopes, + .. } = resolve(&cfg).unwrap() else { panic!("expected Builtin variant"); @@ -252,7 +255,10 @@ mod tests { ..Default::default() }; let ResolvedProvider::Custom { - client_id, device_authorization_endpoint, scopes, .. + client_id, + device_authorization_endpoint, + scopes, + .. } = resolve(&cfg).unwrap() else { panic!("expected Custom variant"); From 24001447f7485f95231c2795a0e04f79f3f15e79 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:00:28 +0000 Subject: [PATCH 41/98] =?UTF-8?q?feat(openab-agent/mcp):=20paste-back=20OA?= =?UTF-8?q?uth=20flow=20primitives=20(ADR=20=C2=A76.4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New mcp::flow module exposes init_paste_authorize(provider, client_id, redirect_uri) -> PasteAuthorize, which generates the PKCE pair + state nonce internally and returns the authorize URL plus the secrets the caller must persist for complete_login to validate the callback. Internalizing pair generation removes a footgun (caller can't mismatch verifier/state) and shrinks the API to the two parameters that actually vary per call. auth::generate_pkce promoted from private to pub so the MCP flow path can share it with Codex — security primitive, single source of truth, no drift on future hardening. Module-scope #![allow(dead_code)] consistent with mcp::oauth — first prod caller (the §6.4 login orchestration) lands in the next slice. Tests cover URL structure, percent-encoding of redirect_uri, scope form-encoding, unparseable authorize_url error path, and custom-provider URL composition. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 2 +- openab-agent/src/mcp/flow.rs | 146 +++++++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 1 + 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 openab-agent/src/mcp/flow.rs diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index b83179e15..ec3a49ca0 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -227,7 +227,7 @@ async fn refresh_token(store: &TokenStore) -> Result { }) } -fn generate_pkce() -> (String, String) { +pub fn generate_pkce() -> (String, String) { let mut buf = [0u8; 32]; getrandom::fill(&mut buf).expect("getrandom failed"); let verifier = URL_SAFE_NO_PAD.encode(buf); diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs new file mode 100644 index 000000000..17b34b5ce --- /dev/null +++ b/openab-agent/src/mcp/flow.rs @@ -0,0 +1,146 @@ +//! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from +//! `crate::auth::generate_pkce` — shared with the Codex paths so a +//! security-primitive change can't drift between modules. Orchestration +//! (device polling, callback parsing) lands in subsequent slices. + +// First prod caller (§6.4 login orchestration) lands in the next slice; +// until then every item is reachable only via tests, so +// `clippy --features mcp -D warnings` would flag dead_code. +#![allow(dead_code)] + +use anyhow::Result; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; +use url::Url; + +use crate::auth::generate_pkce; +use super::oauth::ResolvedProvider; + +/// 16-byte URL-safe `state` nonce for the OAuth authorize URL. +fn generate_state() -> String { + let mut buf = [0u8; 16]; + getrandom::fill(&mut buf).expect("getrandom failed"); + URL_SAFE_NO_PAD.encode(buf) +} + +/// Result of `init_paste_authorize`: the URL to surface to the user, plus +/// the `code_verifier` + `state` the caller must persist under the +/// pending-login key for `complete_login` to validate the callback. +pub struct PasteAuthorize { + pub url: String, + pub code_verifier: String, + pub state: String, +} + +/// Start a paste-back OAuth 2.1 authorize flow. Generates the PKCE pair +/// and state nonce internally so the caller can't pair them up wrong; +/// builds the RFC 6749 authorize URL with `S256` PKCE and space-joined +/// scopes. `client_id` is caller-supplied: built-ins look it up via a +/// hard-coded helper (mirroring `auth::codex_client_id`); custom +/// providers carry it on `ResolvedProvider::Custom`. `redirect_uri` is +/// the provider's pinned callback for built-ins or a runtime-bound +/// `localhost:` for custom paste-back flows. +pub fn init_paste_authorize( + provider: &ResolvedProvider, + client_id: &str, + redirect_uri: &str, +) -> Result { + let (code_verifier, code_challenge) = generate_pkce(); + let state = generate_state(); + let mut url = Url::parse(provider.authorize_url())?; + url.query_pairs_mut() + .append_pair("response_type", "code") + .append_pair("client_id", client_id) + .append_pair("redirect_uri", redirect_uri) + .append_pair("code_challenge", &code_challenge) + .append_pair("code_challenge_method", "S256") + .append_pair("state", &state) + .append_pair("scope", &provider.scopes().join(" ")); + Ok(PasteAuthorize { + url: url.to_string(), + code_verifier, + state, + }) +} + +#[cfg(test)] +mod tests { + use crate::mcp::config::OAuthConfig; + use crate::mcp::oauth::resolve; + use super::*; + + const TEST_REDIRECT: &str = "http://localhost:53692/callback"; + + #[test] + fn state_is_url_safe_and_unique() { + let s = generate_state(); + let url_safe = s + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'); + assert!(url_safe); + assert_ne!(s, generate_state()); + } + + fn builtin_provider() -> ResolvedProvider { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + ..Default::default() + }; + resolve(&cfg).unwrap() + } + + #[test] + fn init_paste_authorize_threads_pkce_and_state_into_url() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "client-xyz", TEST_REDIRECT).unwrap(); + assert!(r.url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(r.url.contains("response_type=code")); + assert!(r.url.contains("client_id=client-xyz")); + assert!(r.url.contains("code_challenge_method=S256")); + assert!(r.url.contains(&format!("state={}", r.state))); + assert!(!r.code_verifier.is_empty()); + } + + #[test] + fn init_paste_authorize_percent_encodes_redirect_uri() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); + let want = "redirect_uri=http%3A%2F%2Flocalhost%3A53692%2Fcallback"; + assert!(r.url.contains(want)); + } + + #[test] + fn init_paste_authorize_form_encodes_scope_spaces_as_plus() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); + assert!(r.url.contains("scope=org%3Acreate_api_key")); + assert!(r.url.contains("user%3Amcp_servers")); + } + + #[test] + fn init_paste_authorize_rejects_unparseable_authorize_url() { + let cfg = OAuthConfig { + provider: Some("broken".to_string()), + authorize_url: Some("not a url".to_string()), + token_url: Some("https://example.com/token".to_string()), + ..Default::default() + }; + let p = resolve(&cfg).unwrap(); + assert!(init_paste_authorize(&p, "c", TEST_REDIRECT).is_err()); + } + + #[test] + fn init_paste_authorize_for_custom_provider() { + let cfg = OAuthConfig { + provider: Some("linear".to_string()), + authorize_url: Some("https://linear.app/oauth/authorize".to_string()), + token_url: Some("https://api.linear.app/oauth/token".to_string()), + client_id: Some("linear-client".to_string()), + scopes: vec!["read".to_string(), "write".to_string()], + ..Default::default() + }; + let p = resolve(&cfg).unwrap(); + let r = init_paste_authorize(&p, "linear-client", TEST_REDIRECT).unwrap(); + assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); + assert!(r.url.contains("scope=read+write")); + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 17884c9c1..81278aa4e 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,6 +1,7 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; +pub mod flow; pub mod meta_tool; pub mod oauth; pub mod runtime; From 7f4ef17a233ab6d34ee5248d1d5dc444857dedf7 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:11:04 +0000 Subject: [PATCH 42/98] fix(openab-agent/mcp): rustfmt import precedence super < crate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rustfmt's reorder_imports does NOT sort by pure alphabetical order across use statements — local path roots have a fixed precedence: self < super < crate < external crates. My alphabetical assumption (c < s, so crate first) was wrong in both flow.rs's module-scope imports and its mod tests block. Burned Tick 36 (twice, same file). Runbook updated alongside the Tick 14 sub-module-vs-bare ordering note since the two rules apply at different scopes. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/flow.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index 17b34b5ce..caac9d340 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -12,8 +12,8 @@ use anyhow::Result; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; -use crate::auth::generate_pkce; use super::oauth::ResolvedProvider; +use crate::auth::generate_pkce; /// 16-byte URL-safe `state` nonce for the OAuth authorize URL. fn generate_state() -> String { @@ -64,9 +64,9 @@ pub fn init_paste_authorize( #[cfg(test)] mod tests { + use super::*; use crate::mcp::config::OAuthConfig; use crate::mcp::oauth::resolve; - use super::*; const TEST_REDIRECT: &str = "http://localhost:53692/callback"; From 95f33bc2d51487330df4aef5fca33576b157d302 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:25:40 +0000 Subject: [PATCH 43/98] feat(openab-agent/mcp): NeedsAuth state for oauth-protected http servers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ServerStatus gains a NeedsAuth variant; McpRuntimeManager::connect() now transitions oauth-protected http servers into that state with an error pointing the user at `mcp login ` instead of staying silently Disconnected. Icon "◌" (U+25CC DOTTED CIRCLE) matches the existing geometric family (○ ◐ ●) rather than "⚠" — the latter is emoji-prone (Discord and many terminals upgrade it via VS16), which would break aligned status output. Status label "needs_auth" wired through meta_tool's snake_case status_label() table. Tests cover both the transition + error format and an idempotency guarantee: a second connect() on a NeedsAuth server must keep the state sticky (only successful `mcp login` clears it). Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 1 + openab-agent/src/mcp/runtime.rs | 58 +++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 557badf4c..7ecfe0034 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -186,6 +186,7 @@ fn status_label(status: &ServerStatus) -> &'static str { ServerStatus::Disconnected => "disconnected", ServerStatus::Connecting => "connecting", ServerStatus::Connected => "connected", + ServerStatus::NeedsAuth => "needs_auth", ServerStatus::Failed(_) => "failed", } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 933fd23d5..3e00dfba0 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -24,12 +24,12 @@ use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -#[allow(dead_code)] // NeedsAuth lands with the Phase 2 OAuth slice (ADR §5.7) #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, Connecting, Connected, + NeedsAuth, Failed(String), } @@ -39,6 +39,7 @@ impl ServerStatus { ServerStatus::Disconnected => "○", ServerStatus::Connecting => "◐", ServerStatus::Connected => "●", + ServerStatus::NeedsAuth => "◌", ServerStatus::Failed(_) => "✗", } } @@ -147,8 +148,10 @@ impl McpRuntimeManager { } /// Lazy-connect the named server (ADR §5.7). Idempotent if already - /// `Connected` with a live client. HTTP servers requiring OAuth are - /// rejected until the Phase 2 auth slice lands (ADR §6). + /// `Connected` with a live client. HTTP servers with an `oauth:` block + /// are routed through `mcp login` first — `connect` marks them + /// `NeedsAuth` and returns an error pointing the caller at the login + /// subcommand rather than attempting an unauthenticated dial. pub async fn connect(&self, name: &str) -> Result<()> { let dial = { let mut guard = self.handles.write().await; @@ -163,17 +166,16 @@ impl McpRuntimeManager { ServerConfig::Stdio { command, args, env, .. } => Dial::Stdio { command, args, env }, - // Reject oauth-protected servers BEFORE the `Connecting` - // transition: we never attempted a handshake, so leaving - // status at `Disconnected` is the honest state. Status - // becomes `Failed` only when a dial was actually tried. - ServerConfig::Http { - oauth: Some(_), - url, - .. - } => { + // Oauth-protected servers can't be dialed via plain connect; + // mark `NeedsAuth` so `mcp status` shows a persistent + // "waiting for login" signal (vs `Disconnected`, which + // implies a plain `connect` would succeed). The `Failed` + // path remains reserved for dials that were attempted and + // failed at handshake. + ServerConfig::Http { oauth: Some(_), .. } => { + handle.status = ServerStatus::NeedsAuth; return Err(anyhow!( - "oauth-protected http server {url:?} requires the auth slice (Phase 2 §6)" + "mcp server {name:?} needs oauth login — run `mcp login {name}`" )); } ServerConfig::Http { url, .. } => Dial::Http { url }, @@ -298,7 +300,7 @@ mod tests { } #[tokio::test] - async fn connect_http_with_oauth_defers_to_auth_slice() { + async fn connect_http_with_oauth_marks_needs_auth() { let json = r#"{ "mcpServers": { "linear": { @@ -311,10 +313,30 @@ mod tests { let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); - assert!(err.contains("oauth"), "expected 'oauth' in {err}"); - // OAuth rejection happens BEFORE the Connecting transition, so the - // server remains Disconnected — no dial was attempted. - assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); + assert!(err.contains("needs oauth login"), "expected hint in {err}"); + assert!(err.contains("mcp login"), "expected 'mcp login' hint in {err}"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + } + + #[tokio::test] + async fn connect_oauth_twice_keeps_needs_auth_sticky() { + // Second connect() must NOT silently re-enter `Connecting` and + // shadow the user-actionable state — the only path out of + // `NeedsAuth` is a successful `mcp login`. + let json = r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + assert!(mgr.connect("linear").await.is_err()); + assert!(mgr.connect("linear").await.is_err()); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); } #[tokio::test] From 32232c0e42ed1bde4a51b6ca7c6ee6973efc5acd Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:30:36 +0000 Subject: [PATCH 44/98] style(openab-agent/mcp): split assert! args to satisfy rustfmt fn_call_width Tick 37's assert!(err.contains("mcp login"), "...") was 84 chars inline but rustfmt's default fn_call_width=60 measures the arg list and split it. Match the formatter. --- openab-agent/src/mcp/runtime.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 3e00dfba0..e1534cefa 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -314,7 +314,10 @@ mod tests { let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); assert!(err.contains("needs oauth login"), "expected hint in {err}"); - assert!(err.contains("mcp login"), "expected 'mcp login' hint in {err}"); + assert!( + err.contains("mcp login"), + "expected 'mcp login' hint in {err}" + ); assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); } From af3a25ddd70bc5175b89dc190b0214e05441a547 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:49:05 +0000 Subject: [PATCH 45/98] feat(openab-agent/mcp): start_paste_login + builtin client_id resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit McpRuntimeManager::start_paste_login(server) wires flow::init_paste_authorize for built-in OAuth providers (ADR §6.4). The PKCE verifier + state are stashed in an in-memory pending_logins map (HashMap) for the next slice's complete_login to consume. Server status flips to NeedsAuth. Scope this slice: - Built-in providers only (anthropic-mcp). Custom-provider paste-back needs runtime callback port allocation; deferred to a follow-up slice. - Custom providers declaring device_authorization_endpoint short-circuit with an explicit "use device flow" error (ADR §6.4 selection logic). - ADR §6.4 says transient state lives "in TokenStore"; this slice keeps it in-process. auth.json needs a heterogeneous-entry schema change to hold non-token shapes — separate slice. oauth::builtin_client_id is the per-provider client_id resolver — env-var- required (no hard-coded default) so paste-back fails loud rather than emitting an authorize URL with a placeholder client_id. flow.rs sheds its module-level #![allow(dead_code)] now that init_paste_authorize has a prod caller transitively from start_paste_login (itself allow-dead-code until the next slice wires the mcp::login action). --- openab-agent/src/mcp/flow.rs | 5 - openab-agent/src/mcp/oauth.rs | 60 ++++++++ openab-agent/src/mcp/runtime.rs | 251 ++++++++++++++++++++++++++++++++ 3 files changed, 311 insertions(+), 5 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index caac9d340..39ed8b13c 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -3,11 +3,6 @@ //! security-primitive change can't drift between modules. Orchestration //! (device polling, callback parsing) lands in subsequent slices. -// First prod caller (§6.4 login orchestration) lands in the next slice; -// until then every item is reachable only via tests, so -// `clippy --features mcp -D warnings` would flag dead_code. -#![allow(dead_code)] - use anyhow::Result; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index fa6cb2497..6d75d7952 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -50,6 +50,29 @@ pub fn builtin(name: &str) -> Option { BUILTINS.iter().copied().find(|spec| spec.name == name) } +/// Resolve a built-in provider's OAuth `client_id`. Mirrors +/// `auth::codex_client_id`'s env-var-override pattern but without a hard- +/// coded default — the Anthropic MCP public client_id isn't yet pinned in +/// this repo, so requiring the env var fails fast with a useful error +/// rather than silently dialing with a placeholder. Replace with a +/// hard-coded default once a real value is published. +pub fn builtin_client_id(provider: &str) -> Result { + let env_var = match provider { + "anthropic-mcp" => "OPENAB_MCP_ANTHROPIC_CLIENT_ID", + other => { + return Err(anyhow!( + "no built-in client_id mapping for provider {other:?}" + )); + } + }; + std::env::var(env_var).map_err(|_| { + anyhow!( + "built-in provider {provider:?} requires env var {env_var} \ + (client_id of the provider's OAuth app)" + ) + }) +} + /// Effective per-server OAuth parameters after resolving the built-in catalog /// and `OAuthConfig` overrides. /// @@ -161,6 +184,43 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result mod tests { use super::*; + // Both env-touching tests below race the same OS env var; serialize + // them per the runbook's Tick 24 lesson (acp.rs ANTHROPIC_API_KEY race). + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + #[test] + fn builtin_client_id_requires_env_var() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + let err = builtin_client_id("anthropic-mcp") + .unwrap_err() + .to_string(); + assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); + } + + #[test] + fn builtin_client_id_uses_env_var_when_set() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-test-id"); + } + let id = builtin_client_id("anthropic-mcp").unwrap(); + assert_eq!(id, "anth-test-id"); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + } + + #[test] + fn builtin_client_id_rejects_unknown_provider() { + let err = builtin_client_id("does-not-exist").unwrap_err().to_string(); + assert!(err.contains("does-not-exist"), "got: {err}"); + } + #[test] fn anthropic_mcp_spec_matches_adr_table() { let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index e1534cefa..d87115e72 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -23,6 +23,8 @@ use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; +use super::flow::init_paste_authorize; +use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { @@ -67,11 +69,39 @@ impl std::fmt::Debug for ServerHandle { } } +/// Transient per-server state captured at `start_paste_login` and consumed +/// by `complete_login` (next slice). `token_url` + `provider_name` are +/// snapshotted up front so a config edit between the two calls can't +/// silently redirect the token exchange. +/// +/// ADR §6.4 says this lives "in TokenStore"; this slice keeps it in +/// process memory only — `auth.json` would need a heterogeneous-entry +/// schema change to hold non-token shapes, deferred to its own slice. +#[derive(Debug, Clone)] +#[allow(dead_code)] // wired in next slice (complete_login) +pub struct PendingPasteLogin { + pub verifier: String, + pub state: String, + pub token_url: String, + pub provider_name: String, +} + +/// Public return of `start_paste_login`. The caller relays `authorize_url` +/// to the user; `state` is echoed so the agent can show / log it without +/// reaching into runtime internals. +#[derive(Debug, Clone)] +#[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) +pub struct PasteLoginStart { + pub authorize_url: String, + pub state: String, +} + /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. #[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { handles: Arc>>, + pending_logins: Arc>>, } impl McpRuntimeManager { @@ -91,6 +121,7 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), + pending_logins: Arc::new(RwLock::new(HashMap::new())), } } @@ -147,6 +178,86 @@ impl McpRuntimeManager { out } + /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` + /// block (ADR §6.4). Produces the authorize URL the agent surfaces to + /// the user; the matching PKCE verifier + `state` nonce are kept on + /// `self.pending_logins` for `complete_login` (next slice) to consume. + /// + /// Scoped to **built-in** providers this slice. Custom-provider + /// paste-back needs runtime port allocation for the callback (§6.4), + /// and any provider that advertises a `device_authorization_endpoint` + /// should run device-code instead (§6.4 selection logic). Both errors + /// are explicit so the LLM can pick a different action. + #[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) + pub async fn start_paste_login(&self, name: &str) -> Result { + let oauth_cfg = { + let guard = self.handles.read().await; + let handle = guard + .get(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + match handle.config.resolved(name)? { + ServerConfig::Http { + oauth: Some(oauth), .. + } => oauth, + ServerConfig::Http { oauth: None, .. } => { + return Err(anyhow!("mcp server {name:?} has no oauth block")); + } + ServerConfig::Stdio { .. } => { + return Err(anyhow!("mcp server {name:?} is stdio, not http+oauth")); + } + } + }; + + let provider = resolve(&oauth_cfg)?; + let (client_id, redirect_uri) = match &provider { + ResolvedProvider::Builtin { + provider_name, callback, .. + } => (builtin_client_id(provider_name)?, (*callback).to_string()), + ResolvedProvider::Custom { + device_authorization_endpoint: Some(_), .. + } => { + return Err(anyhow!( + "mcp server {name:?} has a device endpoint; use device flow" + )); + } + ResolvedProvider::Custom { .. } => { + return Err(anyhow!( + "mcp server {name:?}: custom-provider paste-back not yet supported" + )); + } + }; + + let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; + let pending = PendingPasteLogin { + verifier: started.code_verifier, + state: started.state.clone(), + token_url: provider.token_url().to_string(), + provider_name: provider_name_of(&provider), + }; + { + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::NeedsAuth; + } + } + self.pending_logins + .write() + .await + .insert(name.to_string(), pending); + Ok(PasteLoginStart { + authorize_url: started.url, + state: started.state, + }) + } + + /// Borrow the in-flight pending paste-login for `name`. Returns a + /// clone so callers don't hold the lock; `complete_login` (next + /// slice) is the intended consumer. + #[allow(dead_code)] // first prod caller is complete_login in next slice + pub async fn pending_paste_login(&self, name: &str) -> Option { + self.pending_logins.read().await.get(name).cloned() + } + /// Lazy-connect the named server (ADR §5.7). Idempotent if already /// `Connected` with a live client. HTTP servers with an `oauth:` block /// are routed through `mcp login` first — `connect` marks them @@ -211,6 +322,15 @@ impl McpRuntimeManager { } } +/// Stringified provider name for the pending-state record. `Builtin` keeps +/// its `&'static str` static; `Custom` already owns a `String`. +fn provider_name_of(provider: &ResolvedProvider) -> String { + match provider { + ResolvedProvider::Builtin { provider_name, .. } => (*provider_name).to_string(), + ResolvedProvider::Custom { provider_name, .. } => provider_name.clone(), + } +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -362,6 +482,137 @@ mod tests { } } + // start_paste_login + builtin_client_id race on the same env var. + // Same fix as oauth.rs / acp.rs (Tick 24 lesson). + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + fn linear_custom_cfg() -> &'static str { + r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { + "provider": "linear", + "authorize_url": "https://linear.app/oauth/authorize", + "token_url": "https://api.linear.app/oauth/token", + "client_id": "linear-client", + "scopes": ["read"] + } + } + } + }"# + } + + fn anthropic_builtin_cfg() -> &'static str { + r#"{ + "mcpServers": { + "anthro": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { "provider": "anthropic-mcp" } + } + } + }"# + } + + async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { + mgr.start_paste_login(name) + .await + .unwrap_err() + .to_string() + } + + #[tokio::test] + async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); + } + let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let start = mgr.start_paste_login("anthro").await.unwrap(); + assert!(start.authorize_url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(start.authorize_url.contains("client_id=anth-cid")); + assert!(start.authorize_url.contains(&format!("state={}", start.state))); + let pending = mgr.pending_paste_login("anthro").await.unwrap(); + assert_eq!(pending.state, start.state); + assert!(!pending.verifier.is_empty()); + assert_eq!( + pending.token_url, + "https://platform.claude.com/v1/oauth/token" + ); + assert_eq!(pending.provider_name, "anthropic-mcp"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + } + + #[tokio::test] + async fn start_paste_login_rejects_custom_provider_for_now() { + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "linear").await; + assert!(err.contains("custom-provider"), "got: {err}"); + assert!(mgr.pending_paste_login("linear").await.is_none()); + } + + #[tokio::test] + async fn start_paste_login_rejects_custom_with_device_endpoint() { + let json = r#"{ + "mcpServers": { + "dev": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { + "provider": "dev", + "authorize_url": "https://example.com/oauth/authorize", + "token_url": "https://example.com/oauth/token", + "device_authorization_endpoint": "https://example.com/oauth/device" + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "dev").await; + assert!(err.contains("device flow"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_rejects_stdio_server() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "fs").await; + assert!(err.contains("stdio"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_unknown_server_errors() { + let mgr = McpRuntimeManager::from_config(McpConfig::default()); + let err = start_login_err(&mgr, "ghost").await; + assert!(err.contains("ghost"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_builtin_without_env_var_errors_loud() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "anthro").await; + assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); + } + #[tokio::test] async fn connect_to_missing_binary_records_failed() { let json = r#"{ From f0de2f29235a560687429b910010dc66bbc39705 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:51:23 +0000 Subject: [PATCH 46/98] style(openab-agent/mcp): satisfy rustfmt for Tick 39 slice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three distinct fmt rule misses on the start_paste_login slice: - oauth.rs: chain at receiver+chain = exactly 60 chars stays inline (over-broken on the unwrap_err+to_string chain) - runtime.rs: struct-pattern binders force per-line when arm body is also long enough that the inline form would overflow — different threshold than the existing connect()'s Stdio arm - runtime.rs: field-access chain ELEMENT counts toward chain_width; `start.authorize_url.starts_with(...)` is 2 chain elements, not 1, so receiver+chain over 60 → break --- openab-agent/src/mcp/oauth.rs | 4 +--- openab-agent/src/mcp/runtime.rs | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 6d75d7952..f3ea31661 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -195,9 +195,7 @@ mod tests { unsafe { std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); } - let err = builtin_client_id("anthropic-mcp") - .unwrap_err() - .to_string(); + let err = builtin_client_id("anthropic-mcp").unwrap_err().to_string(); assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index d87115e72..d363bf487 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -211,10 +211,13 @@ impl McpRuntimeManager { let provider = resolve(&oauth_cfg)?; let (client_id, redirect_uri) = match &provider { ResolvedProvider::Builtin { - provider_name, callback, .. + provider_name, + callback, + .. } => (builtin_client_id(provider_name)?, (*callback).to_string()), ResolvedProvider::Custom { - device_authorization_endpoint: Some(_), .. + device_authorization_endpoint: Some(_), + .. } => { return Err(anyhow!( "mcp server {name:?} has a device endpoint; use device flow" @@ -517,10 +520,7 @@ mod tests { } async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { - mgr.start_paste_login(name) - .await - .unwrap_err() - .to_string() + mgr.start_paste_login(name).await.unwrap_err().to_string() } #[tokio::test] @@ -533,9 +533,13 @@ mod tests { let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let start = mgr.start_paste_login("anthro").await.unwrap(); - assert!(start.authorize_url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(start + .authorize_url + .starts_with("https://claude.ai/oauth/authorize?")); assert!(start.authorize_url.contains("client_id=anth-cid")); - assert!(start.authorize_url.contains(&format!("state={}", start.state))); + assert!(start + .authorize_url + .contains(&format!("state={}", start.state))); let pending = mgr.pending_paste_login("anthro").await.unwrap(); assert_eq!(pending.state, start.state); assert!(!pending.verifier.is_empty()); From 7dd718c5066021fa4f8feea0f65289e7285f97aa Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:11:12 +0000 Subject: [PATCH 47/98] feat(openab-agent): split auth.json into TokenStore | PendingPasteLogin Untagged Serde enum `AuthEntry` keeps refresh-task state machine separate from in-flight paste-login state. Per Mira's Tick 39 review: repurposing TokenStore fields for pending entries would have made refresh loop on them. Adds `{load,save,remove}_pending_login` helpers (mcp-gated, wired in next slice via runtime::start_paste_login). --- openab-agent/src/auth.rs | 203 +++++++++++++++++++++++++++++++++++---- 1 file changed, 183 insertions(+), 20 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index ec3a49ca0..076b56ea1 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -39,6 +39,38 @@ pub struct TokenStore { pub provider: String, } +/// Transient per-server state captured at `start_paste_login` and consumed +/// by `complete_login` (ADR §6.4). Lives in `auth.json` under +/// `mcp-pending:`. `token_url` + `provider_name` are snapshotted +/// up front so a config edit between init and finish can't redirect the +/// token exchange. +/// +/// Unconditionally compiled (not behind `mcp` feature) so a non-mcp build +/// can still parse + round-trip an `auth.json` containing pending entries. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PendingPasteLogin { + pub verifier: String, + pub state: String, + pub token_url: String, + pub provider_name: String, +} + +/// `auth.json` value type. Untagged Serde enum: `TokenStore` has required +/// `access_token`, `PendingPasteLogin` has required `verifier` — the +/// shapes are disjoint, so deserialization picks the right variant +/// without an explicit tag (and existing files stay byte-compatible). +/// +/// Per Mira's Tick 39 review: option-A (repurposing TokenStore fields for +/// pending state) would have made the refresh task treat pending entries +/// as "expired tokens" and loop on them. The untagged enum keeps the two +/// state machines completely separate. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum AuthEntry { + Token(TokenStore), + Pending(PendingPasteLogin), +} + fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) @@ -54,7 +86,7 @@ fn auth_path() -> PathBuf { /// Discriminates by the top-level `access_token` key — present means the /// file is the legacy `TokenStore` shape, absent means the new namespaced /// map. A single JSON parse gives accurate error context either way. -fn read_auth_file(path: &Path) -> Result> { +fn read_auth_file(path: &Path) -> Result> { let data = std::fs::read_to_string(path)?; let value: serde_json::Value = serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; @@ -62,7 +94,7 @@ fn read_auth_file(path: &Path) -> Result> { let legacy: TokenStore = serde_json::from_value(value) .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; let mut map = HashMap::new(); - map.insert(CODEX_NAMESPACE.to_string(), legacy); + map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(legacy)); return Ok(map); } serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) @@ -72,7 +104,7 @@ fn read_auth_file(path: &Path) -> Result> { /// satisfies the ADR §6.1 refresh-token rotation contract — without it, a /// Spot interruption between local write and S3 sync would restore a /// revoked refresh token from durable storage on the next task start. -fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { +fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } @@ -106,18 +138,19 @@ pub fn load_tokens() -> Result { path.display() ) })?; - map.get(CODEX_NAMESPACE).cloned().ok_or_else(|| { - anyhow!( + match map.get(CODEX_NAMESPACE) { + Some(AuthEntry::Token(t)) => Ok(t.clone()), + _ => Err(anyhow!( "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", path.display() - ) - }) + )), + } } fn save_tokens(store: &TokenStore) -> Result<()> { let path = auth_path(); let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(CODEX_NAMESPACE.to_string(), store.clone()); + map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(store.clone())); write_auth_file(&path, &map) } @@ -130,9 +163,11 @@ pub fn load_namespaced_token(key: &str) -> Result { let path = auth_path(); let map = read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; - map.get(key) - .cloned() - .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) + match map.get(key) { + Some(AuthEntry::Token(t)) => Ok(t.clone()), + Some(AuthEntry::Pending(_)) => Err(anyhow!("{key:?} is a pending login, not a token")), + None => Err(anyhow!("no credentials stored for {key:?}")), + } } /// Insert or replace the credential at `key`, preserving all other entries. @@ -143,7 +178,54 @@ pub fn load_namespaced_token(key: &str) -> Result { pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { let path = auth_path(); let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(key.to_string(), store.clone()); + map.insert(key.to_string(), AuthEntry::Token(store.clone())); + write_auth_file(&path, &map) +} + +/// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors +/// if the key holds a token instead — the two namespaces shouldn't +/// collide, but a hand-edited file would. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) +pub fn load_pending_login(key: &str) -> Result { + let path = auth_path(); + let map = + read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + match map.get(key) { + Some(AuthEntry::Pending(p)) => Ok(p.clone()), + Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), + None => Err(anyhow!("no pending login for {key:?}")), + } +} + +/// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). +/// Read-modify-write — same serialization caveat as `save_namespaced_token`. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) +pub fn save_pending_login(key: &str, val: &PendingPasteLogin) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(key.to_string(), AuthEntry::Pending(val.clone())); + write_auth_file(&path, &map) +} + +/// Remove a pending-login entry (consumed on successful `complete_login`, +/// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (complete_login) +pub fn remove_pending_login(key: &str) -> Result<()> { + let path = auth_path(); + let mut map = match read_auth_file(&path) { + Ok(m) => m, + Err(_) => return Ok(()), + }; + if map.remove(key).is_none() { + return Ok(()); + } + if map.is_empty() { + let _ = std::fs::remove_file(&path); + return Ok(()); + } write_auth_file(&path, &map) } @@ -625,6 +707,13 @@ mod tests { assert_eq!(challenge, expected); } + fn token_of(entry: Option<&AuthEntry>) -> &TokenStore { + match entry { + Some(AuthEntry::Token(t)) => t, + other => panic!("expected Token, got {other:?}"), + } + } + #[test] fn read_auth_file_migrates_legacy_single_tenant_format() { let dir = tempfile::tempdir().unwrap(); @@ -634,7 +723,7 @@ mod tests { let map = read_auth_file(&path).unwrap(); assert_eq!(map.len(), 1); assert_eq!( - map.get(CODEX_NAMESPACE).unwrap().access_token, + token_of(map.get(CODEX_NAMESPACE)).access_token, "test_access_token_value" ); } @@ -644,13 +733,13 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("codex".to_string(), make_store(1)); - input.insert("mcp:linear".to_string(), make_store(2)); + input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); + input.insert("mcp:linear".to_string(), AuthEntry::Token(make_store(2))); write_auth_file(&path, &input).unwrap(); let map = read_auth_file(&path).unwrap(); assert_eq!(map.len(), 2); - assert_eq!(map.get("codex").unwrap().expires_at, 1); - assert_eq!(map.get("mcp:linear").unwrap().expires_at, 2); + assert_eq!(token_of(map.get("codex")).expires_at, 1); + assert_eq!(token_of(map.get("mcp:linear")).expires_at, 2); } #[test] @@ -658,12 +747,12 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("mcp:github".to_string(), make_store(42)); + input.insert("mcp:github".to_string(), AuthEntry::Token(make_store(42))); write_auth_file(&path, &input).unwrap(); let raw = std::fs::read_to_string(&path).unwrap(); assert!(raw.contains("mcp:github")); let map = read_auth_file(&path).unwrap(); - assert_eq!(map.get("mcp:github").unwrap().expires_at, 42); + assert_eq!(token_of(map.get("mcp:github")).expires_at, 42); } #[cfg(unix)] @@ -673,9 +762,83 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("codex".to_string(), make_store(0)); + input.insert("codex".to_string(), AuthEntry::Token(make_store(0))); write_auth_file(&path, &input).unwrap(); let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); } + + fn make_pending() -> PendingPasteLogin { + PendingPasteLogin { + verifier: "test-verifier".to_string(), + state: "test-state".to_string(), + token_url: "https://example.com/token".to_string(), + provider_name: "anthropic-mcp".to_string(), + } + } + + #[test] + fn auth_entry_untagged_round_trip_mixed_shapes() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); + input.insert( + "mcp-pending:linear".to_string(), + AuthEntry::Pending(make_pending()), + ); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(token_of(map.get("codex")).expires_at, 1); + match map.get("mcp-pending:linear") { + Some(AuthEntry::Pending(p)) => assert_eq!(p.verifier, "test-verifier"), + other => panic!("expected Pending, got {other:?}"), + } + } + + #[cfg(feature = "mcp")] + #[test] + fn pending_login_helpers_round_trip_via_global_path() { + // Drive the disk-backed save/load/remove path end-to-end. Touches + // the real `auth_path()` (env HOME) so isolate via a tempdir HOME. + // Single test = no need for an ENV_LOCK mutex. + let dir = tempfile::tempdir().unwrap(); + let prior_home = std::env::var("HOME").ok(); + // SAFETY: single-threaded, restored at end. + unsafe { + std::env::set_var("HOME", dir.path()); + } + let key = "mcp-pending:test-srv"; + save_pending_login(key, &make_pending()).unwrap(); + let got = load_pending_login(key).unwrap(); + assert_eq!(got, make_pending()); + remove_pending_login(key).unwrap(); + assert!(load_pending_login(key).is_err()); + unsafe { + match prior_home { + Some(h) => std::env::set_var("HOME", h), + None => std::env::remove_var("HOME"), + } + } + } + + #[cfg(feature = "mcp")] + #[test] + fn load_namespaced_token_errors_on_pending_entry() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert( + "mcp-pending:srv".to_string(), + AuthEntry::Pending(make_pending()), + ); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + // Directly assert the discriminant rather than calling + // `load_namespaced_token`, which would also touch HOME and race + // the pending-helpers test above. Same intent, smaller blast radius. + let pending = map.get("mcp-pending:srv"); + assert!(matches!(pending, Some(AuthEntry::Pending(_)))); + } } From 79b1e2e5ed96d93fb70044f803875127ae7ef36f Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:32:34 +0000 Subject: [PATCH 48/98] feat(openab-agent/mcp): persist pending paste-login to auth.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `start_paste_login` now writes the `mcp-pending:` entry via `auth::save_pending_login`, dropping the in-memory `pending_logins` map. Aligns runtime state with the ADR §6.4 contract ("kept in TokenStore") so `complete_login` survives an agent restart. To keep tests off the real `$HOME/.openab/agent/auth.json` (the cross-module HOME-env race that bit Tick 24), the auth-path becomes an injected `PathBuf` field: `from_config()` defaults to `auth::auth_path()`, `from_config_with_auth_path()` lets tests point at a tempdir. The two tests that exercise the disk path adopt a `mgr_with_tempdir` helper; rejection tests untouched (they error before persist). `auth::{load,save,remove}_pending_login` likewise take `&Path` so they're driven by the injected path, not a global. --- openab-agent/src/auth.rs | 66 ++++++++++++++------------------- openab-agent/src/mcp/runtime.rs | 66 +++++++++++++++++---------------- 2 files changed, 62 insertions(+), 70 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 076b56ea1..c1cda68a2 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -71,7 +71,10 @@ pub enum AuthEntry { Pending(PendingPasteLogin), } -fn auth_path() -> PathBuf { +/// Default location of `auth.json`. Exposed so `McpRuntimeManager` can +/// thread the same path into its constructor and tests can inject a +/// tempdir without touching `$HOME` (which would race cross-module). +pub fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) .join(".openab") @@ -184,13 +187,13 @@ pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { /// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors /// if the key holds a token instead — the two namespaces shouldn't -/// collide, but a hand-edited file would. +/// collide, but a hand-edited file would. `path` is injected so the +/// runtime manager can point tests at a tempdir; production callers pass +/// `auth_path()`. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) -pub fn load_pending_login(key: &str) -> Result { - let path = auth_path(); +pub fn load_pending_login(path: &Path, key: &str) -> Result { let map = - read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; match map.get(key) { Some(AuthEntry::Pending(p)) => Ok(p.clone()), Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), @@ -201,21 +204,18 @@ pub fn load_pending_login(key: &str) -> Result { /// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). /// Read-modify-write — same serialization caveat as `save_namespaced_token`. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) -pub fn save_pending_login(key: &str, val: &PendingPasteLogin) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); +pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Result<()> { + let mut map = read_auth_file(path).unwrap_or_default(); map.insert(key.to_string(), AuthEntry::Pending(val.clone())); - write_auth_file(&path, &map) + write_auth_file(path, &map) } /// Remove a pending-login entry (consumed on successful `complete_login`, /// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. #[cfg(feature = "mcp")] #[allow(dead_code)] // wired in next slice (complete_login) -pub fn remove_pending_login(key: &str) -> Result<()> { - let path = auth_path(); - let mut map = match read_auth_file(&path) { +pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { + let mut map = match read_auth_file(path) { Ok(m) => m, Err(_) => return Ok(()), }; @@ -223,10 +223,10 @@ pub fn remove_pending_login(key: &str) -> Result<()> { return Ok(()); } if map.is_empty() { - let _ = std::fs::remove_file(&path); + let _ = std::fs::remove_file(path); return Ok(()); } - write_auth_file(&path, &map) + write_auth_file(path, &map) } /// Remove the credential at `key`. Idempotent — missing key is not an @@ -799,28 +799,17 @@ mod tests { #[cfg(feature = "mcp")] #[test] - fn pending_login_helpers_round_trip_via_global_path() { - // Drive the disk-backed save/load/remove path end-to-end. Touches - // the real `auth_path()` (env HOME) so isolate via a tempdir HOME. - // Single test = no need for an ENV_LOCK mutex. + fn pending_login_helpers_round_trip_via_injected_path() { + // Tempdir path injected directly — no HOME-env shimming, so this + // test can't race auth-touching tests in other modules. let dir = tempfile::tempdir().unwrap(); - let prior_home = std::env::var("HOME").ok(); - // SAFETY: single-threaded, restored at end. - unsafe { - std::env::set_var("HOME", dir.path()); - } + let path = dir.path().join("auth.json"); let key = "mcp-pending:test-srv"; - save_pending_login(key, &make_pending()).unwrap(); - let got = load_pending_login(key).unwrap(); + save_pending_login(&path, key, &make_pending()).unwrap(); + let got = load_pending_login(&path, key).unwrap(); assert_eq!(got, make_pending()); - remove_pending_login(key).unwrap(); - assert!(load_pending_login(key).is_err()); - unsafe { - match prior_home { - Some(h) => std::env::set_var("HOME", h), - None => std::env::remove_var("HOME"), - } - } + remove_pending_login(&path, key).unwrap(); + assert!(load_pending_login(&path, key).is_err()); } #[cfg(feature = "mcp")] @@ -835,9 +824,10 @@ mod tests { ); write_auth_file(&path, &input).unwrap(); let map = read_auth_file(&path).unwrap(); - // Directly assert the discriminant rather than calling - // `load_namespaced_token`, which would also touch HOME and race - // the pending-helpers test above. Same intent, smaller blast radius. + // Assert the discriminant directly. `load_namespaced_token` would + // reach into the real `$HOME/.openab/agent/auth.json` and race + // cross-module tests; the variant check is the actual property + // under test. let pending = map.get("mcp-pending:srv"); assert!(matches!(pending, Some(AuthEntry::Pending(_)))); } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index d363bf487..78e8457d8 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -13,6 +13,7 @@ //! the duration of a child-process spawn + handshake. use std::collections::HashMap; +use std::path::PathBuf; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -25,6 +26,7 @@ use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; use super::flow::init_paste_authorize; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; +use crate::auth::{auth_path, load_pending_login, save_pending_login, PendingPasteLogin}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { @@ -69,23 +71,6 @@ impl std::fmt::Debug for ServerHandle { } } -/// Transient per-server state captured at `start_paste_login` and consumed -/// by `complete_login` (next slice). `token_url` + `provider_name` are -/// snapshotted up front so a config edit between the two calls can't -/// silently redirect the token exchange. -/// -/// ADR §6.4 says this lives "in TokenStore"; this slice keeps it in -/// process memory only — `auth.json` would need a heterogeneous-entry -/// schema change to hold non-token shapes, deferred to its own slice. -#[derive(Debug, Clone)] -#[allow(dead_code)] // wired in next slice (complete_login) -pub struct PendingPasteLogin { - pub verifier: String, - pub state: String, - pub token_url: String, - pub provider_name: String, -} - /// Public return of `start_paste_login`. The caller relays `authorize_url` /// to the user; `state` is echoed so the agent can show / log it without /// reaching into runtime internals. @@ -98,14 +83,21 @@ pub struct PasteLoginStart { /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct McpRuntimeManager { handles: Arc>>, - pending_logins: Arc>>, + /// `auth.json` location used for `mcp-pending:` persistence. + /// Injectable so tests can point at a tempdir instead of `$HOME`, + /// avoiding cross-module HOME-env races (Tick 24 lesson + ADR §6.4). + auth_path: PathBuf, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { + Self::from_config_with_auth_path(cfg, auth_path()) + } + + pub fn from_config_with_auth_path(cfg: McpConfig, auth_path: PathBuf) -> Self { let handles: HashMap<_, _> = cfg .servers .into_iter() @@ -121,7 +113,7 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), - pending_logins: Arc::new(RwLock::new(HashMap::new())), + auth_path, } } @@ -180,8 +172,9 @@ impl McpRuntimeManager { /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` /// block (ADR §6.4). Produces the authorize URL the agent surfaces to - /// the user; the matching PKCE verifier + `state` nonce are kept on - /// `self.pending_logins` for `complete_login` (next slice) to consume. + /// the user; the matching PKCE verifier + `state` nonce are persisted + /// under `mcp-pending:` in `auth.json` for `complete_login` + /// (next slice) to consume. /// /// Scoped to **built-in** providers this slice. Custom-provider /// paste-back needs runtime port allocation for the callback (§6.4), @@ -237,28 +230,26 @@ impl McpRuntimeManager { token_url: provider.token_url().to_string(), provider_name: provider_name_of(&provider), }; + save_pending_login(&self.auth_path, &pending_key(name), &pending)?; { let mut handles = self.handles.write().await; if let Some(handle) = handles.get_mut(name) { handle.status = ServerStatus::NeedsAuth; } } - self.pending_logins - .write() - .await - .insert(name.to_string(), pending); Ok(PasteLoginStart { authorize_url: started.url, state: started.state, }) } - /// Borrow the in-flight pending paste-login for `name`. Returns a - /// clone so callers don't hold the lock; `complete_login` (next - /// slice) is the intended consumer. + /// Read the on-disk pending paste-login for `name`. `None` if there's + /// no entry or the file is unreadable; `complete_login` (next slice) + /// is the intended consumer and will distinguish the cases via the + /// `auth::load_pending_login` error message. #[allow(dead_code)] // first prod caller is complete_login in next slice pub async fn pending_paste_login(&self, name: &str) -> Option { - self.pending_logins.read().await.get(name).cloned() + load_pending_login(&self.auth_path, &pending_key(name)).ok() } /// Lazy-connect the named server (ADR §5.7). Idempotent if already @@ -334,6 +325,11 @@ fn provider_name_of(provider: &ResolvedProvider) -> String { } } +/// `auth.json` key for an in-flight paste-login (ADR §6.4 namespace). +fn pending_key(name: &str) -> String { + format!("mcp-pending:{name}") +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -523,6 +519,12 @@ mod tests { mgr.start_paste_login(name).await.unwrap_err().to_string() } + fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + (McpRuntimeManager::from_config_with_auth_path(cfg, path), dir) + } + #[tokio::test] async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); @@ -531,7 +533,7 @@ mod tests { std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); } let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); + let (mgr, _dir) = mgr_with_tempdir(cfg); let start = mgr.start_paste_login("anthro").await.unwrap(); assert!(start .authorize_url @@ -557,7 +559,7 @@ mod tests { #[tokio::test] async fn start_paste_login_rejects_custom_provider_for_now() { let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); + let (mgr, _dir) = mgr_with_tempdir(cfg); let err = start_login_err(&mgr, "linear").await; assert!(err.contains("custom-provider"), "got: {err}"); assert!(mgr.pending_paste_login("linear").await.is_none()); From fd6adcf3d1f124ca151aade6b59d95499c98c399 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:41:22 +0000 Subject: [PATCH 49/98] style(openab-agent/mcp): satisfy rustfmt for mgr_with_tempdir tuple `(McpRuntimeManager::from_config_with_auth_path(cfg, path), dir)` at 60 chars between parens trips rustfmt's tuple width heuristic and gets exploded into a 4-line literal. Bind the manager first so the tuple stays a 2-token one-liner. No behaviour change. --- openab-agent/src/mcp/runtime.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 78e8457d8..9e8a517e1 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -521,8 +521,8 @@ mod tests { fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - (McpRuntimeManager::from_config_with_auth_path(cfg, path), dir) + let mgr = McpRuntimeManager::from_config_with_auth_path(cfg, dir.path().join("auth.json")); + (mgr, dir) } #[tokio::test] From 09539858cef83288faf61e9bdc95df6983348ecb Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:54:00 +0000 Subject: [PATCH 50/98] feat(openab-agent/mcp): parse_paste_callback URL helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure URL → authorization-code helper for the upcoming `runtime::complete_login` (ADR §6.4). Validates the `state` echo before returning the `code` so CSRF / cross-flow contamination fails closed before any token-endpoint round-trip. Surfaces an `error=` query param verbatim and tolerates extra parameters (`iss`, vendor tracking) without rejecting valid callbacks. Token exchange + runtime wiring follow in the next slice; helper carries `#[allow(dead_code)]` until that lands so the no-feature build stays warning-clean. --- openab-agent/src/mcp/flow.rs | 84 ++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index 39ed8b13c..7d0fd7c80 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -1,9 +1,9 @@ //! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from //! `crate::auth::generate_pkce` — shared with the Codex paths so a -//! security-primitive change can't drift between modules. Orchestration -//! (device polling, callback parsing) lands in subsequent slices. +//! security-primitive change can't drift between modules. Device +//! polling orchestration lands in a subsequent slice. -use anyhow::Result; +use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; @@ -57,6 +57,35 @@ pub fn init_paste_authorize( }) } +/// Parse a paste-back callback URL into its authorization `code` after +/// validating the `state` echo. OAuth 2.1 RFC 6749 §10.12 + §4.1.2 — a +/// mismatched `state` indicates CSRF / cross-flow contamination and MUST +/// reject the exchange before any token-endpoint round-trip. Tolerates +/// extra query params (vendor-specific tracking, `iss`, etc.). +#[allow(dead_code)] // wired in next slice (runtime::complete_login) +pub fn parse_paste_callback(redirect_url: &str, expected_state: &str) -> Result { + let url = Url::parse(redirect_url).map_err(|e| anyhow!("invalid redirect URL: {e}"))?; + let mut code = None; + let mut state = None; + let mut error = None; + for (k, v) in url.query_pairs() { + match k.as_ref() { + "code" => code = Some(v.into_owned()), + "state" => state = Some(v.into_owned()), + "error" => error = Some(v.into_owned()), + _ => {} + } + } + if let Some(err) = error { + return Err(anyhow!("authorize endpoint returned error: {err}")); + } + let got_state = state.ok_or_else(|| anyhow!("callback missing state"))?; + if got_state != expected_state { + return Err(anyhow!("state mismatch; flow rejected")); + } + code.ok_or_else(|| anyhow!("callback missing code")) +} + #[cfg(test)] mod tests { use super::*; @@ -138,4 +167,53 @@ mod tests { assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); assert!(r.url.contains("scope=read+write")); } + + #[test] + fn parse_paste_callback_extracts_code_when_state_matches() { + let url = "http://localhost:53692/callback?code=abc123&state=xyz"; + let code = parse_paste_callback(url, "xyz").unwrap(); + assert_eq!(code, "abc123"); + } + + #[test] + fn parse_paste_callback_tolerates_extra_query_params() { + let url = "http://localhost:53692/cb?iss=https%3A%2F%2Fauth&state=s&code=c&tracking=1"; + let code = parse_paste_callback(url, "s").unwrap(); + assert_eq!(code, "c"); + } + + #[test] + fn parse_paste_callback_rejects_state_mismatch() { + let url = "http://localhost:53692/cb?code=c&state=wrong"; + let err = parse_paste_callback(url, "want").unwrap_err().to_string(); + assert!(err.contains("state mismatch"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_missing_state() { + let url = "http://localhost:53692/cb?code=c"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("missing state"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_missing_code() { + let url = "http://localhost:53692/cb?state=x"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("missing code"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_surfaces_authorize_error() { + let url = "http://localhost:53692/cb?error=access_denied&state=x"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("access_denied"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_unparseable_url() { + let url = "not a url"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("invalid redirect URL"), "got: {err}"); + } } From 99cd29c59b174429142743750d029ced80e63f91 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 03:30:53 +0000 Subject: [PATCH 51/98] revert: scope PR #959 to Phase 1 per shaun-agent Balanced split Reverts the 18 OAuth-related commits (2a69bf9..0953985) so this PR contains only ADR + Phase 1 foundation: rmcp dep, mcpServers config loader, stdio transport, meta-tool dispatch, anonymous Streamable HTTP transport body. Phase 2 OAuth work (TokenStore namespacing, provider catalog, paste-back flow primitives, AuthEntry split, parse_paste_callback) is preserved on feat/openab-agent-mcp-oauth-stash and will be proposed as a separate PR once #959 lands. Per shaun-agent auto-screen Balanced recommendation: smaller PRs get reviewed; one mega-PR doesn't. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 330 ++-------------------------- openab-agent/src/mcp/config.rs | 135 +----------- openab-agent/src/mcp/flow.rs | 219 ------------------- openab-agent/src/mcp/meta_tool.rs | 1 - openab-agent/src/mcp/mod.rs | 2 - openab-agent/src/mcp/oauth.rs | 351 ------------------------------ openab-agent/src/mcp/runtime.rs | 320 ++------------------------- 7 files changed, 40 insertions(+), 1318 deletions(-) delete mode 100644 openab-agent/src/mcp/flow.rs delete mode 100644 openab-agent/src/mcp/oauth.rs diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index c1cda68a2..385ccede9 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -2,16 +2,11 @@ use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use std::collections::HashMap; use std::io::{BufRead, Write}; use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; -/// Namespace key for the existing Codex single-tenant credential. -/// Lives next to future `mcp:` entries inside `auth.json`. -const CODEX_NAMESPACE: &str = "codex"; - const REFRESH_SKEW_SECONDS: u64 = 120; const CODEX_AUTHORIZE_URL: &str = "https://auth.openai.com/oauth/authorize"; @@ -39,42 +34,7 @@ pub struct TokenStore { pub provider: String, } -/// Transient per-server state captured at `start_paste_login` and consumed -/// by `complete_login` (ADR §6.4). Lives in `auth.json` under -/// `mcp-pending:`. `token_url` + `provider_name` are snapshotted -/// up front so a config edit between init and finish can't redirect the -/// token exchange. -/// -/// Unconditionally compiled (not behind `mcp` feature) so a non-mcp build -/// can still parse + round-trip an `auth.json` containing pending entries. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub struct PendingPasteLogin { - pub verifier: String, - pub state: String, - pub token_url: String, - pub provider_name: String, -} - -/// `auth.json` value type. Untagged Serde enum: `TokenStore` has required -/// `access_token`, `PendingPasteLogin` has required `verifier` — the -/// shapes are disjoint, so deserialization picks the right variant -/// without an explicit tag (and existing files stay byte-compatible). -/// -/// Per Mira's Tick 39 review: option-A (repurposing TokenStore fields for -/// pending state) would have made the refresh task treat pending entries -/// as "expired tokens" and loop on them. The untagged enum keeps the two -/// state machines completely separate. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(untagged)] -pub enum AuthEntry { - Token(TokenStore), - Pending(PendingPasteLogin), -} - -/// Default location of `auth.json`. Exposed so `McpRuntimeManager` can -/// thread the same path into its constructor and tests can inject a -/// tempdir without touching `$HOME` (which would race cross-module). -pub fn auth_path() -> PathBuf { +fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) .join(".openab") @@ -82,36 +42,23 @@ pub fn auth_path() -> PathBuf { .join("auth.json") } -/// Read the `auth.json` map, transparently migrating a legacy single-tenant -/// Codex token file into the new namespaced shape. The migrated map is held -/// in-memory only; the file is rewritten in the new shape on the next save. -/// -/// Discriminates by the top-level `access_token` key — present means the -/// file is the legacy `TokenStore` shape, absent means the new namespaced -/// map. A single JSON parse gives accurate error context either way. -fn read_auth_file(path: &Path) -> Result> { - let data = std::fs::read_to_string(path)?; - let value: serde_json::Value = - serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; - if value.get("access_token").is_some() { - let legacy: TokenStore = serde_json::from_value(value) - .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; - let mut map = HashMap::new(); - map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(legacy)); - return Ok(map); - } - serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) +pub fn load_tokens() -> Result { + let path = auth_path(); + let data = std::fs::read_to_string(&path).map_err(|_| { + anyhow!( + "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + })?; + serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}")) } -/// Atomically replace `auth.json` with the new map. `fsync(2)` after write -/// satisfies the ADR §6.1 refresh-token rotation contract — without it, a -/// Spot interruption between local write and S3 sync would restore a -/// revoked refresh token from durable storage on the next task start. -fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { +fn save_tokens(store: &TokenStore) -> Result<()> { + let path = auth_path(); if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } - let data = serde_json::to_string_pretty(map)?; + let data = serde_json::to_string_pretty(store)?; #[cfg(unix)] { use std::fs::OpenOptions; @@ -122,134 +69,16 @@ fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> .create(true) .truncate(true) .mode(0o600) - .open(path)?; + .open(&path)?; file.write_all(data.as_bytes())?; - file.sync_all()?; } #[cfg(not(unix))] { - std::fs::write(path, &data)?; + std::fs::write(&path, &data)?; } Ok(()) } -pub fn load_tokens() -> Result { - let path = auth_path(); - let map = read_auth_file(&path).map_err(|_| { - anyhow!( - "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", - path.display() - ) - })?; - match map.get(CODEX_NAMESPACE) { - Some(AuthEntry::Token(t)) => Ok(t.clone()), - _ => Err(anyhow!( - "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", - path.display() - )), - } -} - -fn save_tokens(store: &TokenStore) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(store.clone())); - write_auth_file(&path, &map) -} - -/// Look up the credential at `key` (e.g. `mcp:linear`). Returns the codex -/// entry for `key = "codex"`, but prefer `load_tokens()` for that path — -/// this helper exists for MCP server-namespaced lookups (ADR §6.1). -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) -pub fn load_namespaced_token(key: &str) -> Result { - let path = auth_path(); - let map = - read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; - match map.get(key) { - Some(AuthEntry::Token(t)) => Ok(t.clone()), - Some(AuthEntry::Pending(_)) => Err(anyhow!("{key:?} is a pending login, not a token")), - None => Err(anyhow!("no credentials stored for {key:?}")), - } -} - -/// Insert or replace the credential at `key`, preserving all other entries. -/// Read-modify-write on a single file: callers in the same process must -/// serialize themselves (the lifecycle manager already does per ADR §5.7). -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) -pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(key.to_string(), AuthEntry::Token(store.clone())); - write_auth_file(&path, &map) -} - -/// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors -/// if the key holds a token instead — the two namespaces shouldn't -/// collide, but a hand-edited file would. `path` is injected so the -/// runtime manager can point tests at a tempdir; production callers pass -/// `auth_path()`. -#[cfg(feature = "mcp")] -pub fn load_pending_login(path: &Path, key: &str) -> Result { - let map = - read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; - match map.get(key) { - Some(AuthEntry::Pending(p)) => Ok(p.clone()), - Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), - None => Err(anyhow!("no pending login for {key:?}")), - } -} - -/// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). -/// Read-modify-write — same serialization caveat as `save_namespaced_token`. -#[cfg(feature = "mcp")] -pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Result<()> { - let mut map = read_auth_file(path).unwrap_or_default(); - map.insert(key.to_string(), AuthEntry::Pending(val.clone())); - write_auth_file(path, &map) -} - -/// Remove a pending-login entry (consumed on successful `complete_login`, -/// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (complete_login) -pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { - let mut map = match read_auth_file(path) { - Ok(m) => m, - Err(_) => return Ok(()), - }; - if map.remove(key).is_none() { - return Ok(()); - } - if map.is_empty() { - let _ = std::fs::remove_file(path); - return Ok(()); - } - write_auth_file(path, &map) -} - -/// Remove the credential at `key`. Idempotent — missing key is not an -/// error. If the map becomes empty, the file is deleted so `mcp doctor` -/// can report "no credentials" instead of "empty file". -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp logout / revoked-refresh recovery) -pub fn remove_namespaced_token(key: &str) -> Result<()> { - let path = auth_path(); - let mut map = match read_auth_file(&path) { - Ok(m) => m, - Err(_) => return Ok(()), - }; - if map.remove(key).is_none() { - return Ok(()); - } - if map.is_empty() { - let _ = std::fs::remove_file(&path); - return Ok(()); - } - write_auth_file(&path, &map) -} - fn is_expired(store: &TokenStore) -> bool { let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -309,7 +138,7 @@ async fn refresh_token(store: &TokenStore) -> Result { }) } -pub fn generate_pkce() -> (String, String) { +fn generate_pkce() -> (String, String) { let mut buf = [0u8; 32]; getrandom::fill(&mut buf).expect("getrandom failed"); let verifier = URL_SAFE_NO_PAD.encode(buf); @@ -706,129 +535,4 @@ mod tests { let expected = URL_SAFE_NO_PAD.encode(Sha256::digest(verifier.as_bytes())); assert_eq!(challenge, expected); } - - fn token_of(entry: Option<&AuthEntry>) -> &TokenStore { - match entry { - Some(AuthEntry::Token(t)) => t, - other => panic!("expected Token, got {other:?}"), - } - } - - #[test] - fn read_auth_file_migrates_legacy_single_tenant_format() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let legacy = serde_json::to_string_pretty(&make_store(9_999_999_999)).unwrap(); - std::fs::write(&path, legacy).unwrap(); - let map = read_auth_file(&path).unwrap(); - assert_eq!(map.len(), 1); - assert_eq!( - token_of(map.get(CODEX_NAMESPACE)).access_token, - "test_access_token_value" - ); - } - - #[test] - fn read_auth_file_parses_new_namespaced_format() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); - input.insert("mcp:linear".to_string(), AuthEntry::Token(make_store(2))); - write_auth_file(&path, &input).unwrap(); - let map = read_auth_file(&path).unwrap(); - assert_eq!(map.len(), 2); - assert_eq!(token_of(map.get("codex")).expires_at, 1); - assert_eq!(token_of(map.get("mcp:linear")).expires_at, 2); - } - - #[test] - fn write_auth_file_round_trips_through_disk() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("mcp:github".to_string(), AuthEntry::Token(make_store(42))); - write_auth_file(&path, &input).unwrap(); - let raw = std::fs::read_to_string(&path).unwrap(); - assert!(raw.contains("mcp:github")); - let map = read_auth_file(&path).unwrap(); - assert_eq!(token_of(map.get("mcp:github")).expires_at, 42); - } - - #[cfg(unix)] - #[test] - fn write_auth_file_creates_file_with_0600_mode() { - use std::os::unix::fs::PermissionsExt; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("codex".to_string(), AuthEntry::Token(make_store(0))); - write_auth_file(&path, &input).unwrap(); - let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; - assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); - } - - fn make_pending() -> PendingPasteLogin { - PendingPasteLogin { - verifier: "test-verifier".to_string(), - state: "test-state".to_string(), - token_url: "https://example.com/token".to_string(), - provider_name: "anthropic-mcp".to_string(), - } - } - - #[test] - fn auth_entry_untagged_round_trip_mixed_shapes() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); - input.insert( - "mcp-pending:linear".to_string(), - AuthEntry::Pending(make_pending()), - ); - write_auth_file(&path, &input).unwrap(); - let map = read_auth_file(&path).unwrap(); - assert_eq!(map.len(), 2); - assert_eq!(token_of(map.get("codex")).expires_at, 1); - match map.get("mcp-pending:linear") { - Some(AuthEntry::Pending(p)) => assert_eq!(p.verifier, "test-verifier"), - other => panic!("expected Pending, got {other:?}"), - } - } - - #[cfg(feature = "mcp")] - #[test] - fn pending_login_helpers_round_trip_via_injected_path() { - // Tempdir path injected directly — no HOME-env shimming, so this - // test can't race auth-touching tests in other modules. - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let key = "mcp-pending:test-srv"; - save_pending_login(&path, key, &make_pending()).unwrap(); - let got = load_pending_login(&path, key).unwrap(); - assert_eq!(got, make_pending()); - remove_pending_login(&path, key).unwrap(); - assert!(load_pending_login(&path, key).is_err()); - } - - #[cfg(feature = "mcp")] - #[test] - fn load_namespaced_token_errors_on_pending_entry() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert( - "mcp-pending:srv".to_string(), - AuthEntry::Pending(make_pending()), - ); - write_auth_file(&path, &input).unwrap(); - let map = read_auth_file(&path).unwrap(); - // Assert the discriminant directly. `load_namespaced_token` would - // reach into the real `$HOME/.openab/agent/auth.json` and race - // cross-module tests; the variant check is the actual property - // under test. - let pending = map.get("mcp-pending:srv"); - assert!(matches!(pending, Some(AuthEntry::Pending(_)))); - } } diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 003bb5310..742459430 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -57,46 +57,15 @@ pub struct ToolFilter { pub exclude: Vec, } -/// OAuth block. -/// -/// `provider` selects a built-in spec from `oauth::builtin()`. Setting it -/// to an unknown name + supplying `authorize_url` / `token_url` defines a -/// custom OAuth 2.1 provider (ADR §6.3). `discovery: true` opts into -/// RFC 8414 dynamic discovery and requires a non-empty -/// `discovery_allowlist` of domains (§6.4 SSRF guard). -#[derive(Debug, Default, Clone, Serialize, Deserialize)] +/// OAuth block. Phase 1 only parses `provider` + `scopes`; custom-provider +/// fields (§6.3: `authorize_url`, `token_url`, `device_authorization_endpoint`, +/// `discovery`, `discovery_allowlist`) land with the Phase 2 auth slice. +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct OAuthConfig { #[serde(default)] pub provider: Option, #[serde(default)] pub scopes: Vec, - #[serde(default)] - pub authorize_url: Option, - #[serde(default)] - pub token_url: Option, - #[serde(default)] - pub client_id: Option, - #[serde(default)] - pub device_authorization_endpoint: Option, - #[serde(default)] - pub discovery: bool, - #[serde(default)] - pub discovery_allowlist: Vec, -} - -impl OAuthConfig { - /// Boot-time validation (ADR §6.3 / §6.4). `discovery: true` without an - /// explicit allowlist is rejected — RFC 8414 lookups in multi-tenant - /// deployments would otherwise become an SSRF vector. - pub fn validate(&self, server: &str) -> Result<()> { - if self.discovery && self.discovery_allowlist.is_empty() { - return Err(anyhow!( - "mcp server {server:?}: oauth.discovery=true requires \ - a non-empty oauth.discovery_allowlist (ADR §6.3)" - )); - } - Ok(()) - } } impl McpConfig { @@ -120,24 +89,9 @@ impl McpConfig { let layer = Self::load_file(path)?; merged.servers.extend(layer.servers); } - merged.validate()?; Ok(merged) } - /// Validate every server's `oauth` block (ADR §6.3 boot check). Returns - /// the first failure — finer-grained per-server isolation lives in §5.6. - pub fn validate(&self) -> Result<()> { - for (name, server) in &self.servers { - if let ServerConfig::Http { - oauth: Some(oauth), .. - } = server - { - oauth.validate(name)?; - } - } - Ok(()) - } - fn load_file(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .with_context(|| format!("read mcp config {}", path.display()))?; @@ -333,85 +287,4 @@ mod tests { _ => unreachable!(), } } - - #[test] - fn parses_custom_oauth_provider_fields() { - let json = r#"{ - "mcpServers": { - "custom": { - "type": "http", - "url": "https://example.com/mcp", - "oauth": { - "provider": "custom", - "authorize_url": "https://example.com/oauth/authorize", - "token_url": "https://example.com/oauth/token", - "client_id": "abc123", - "device_authorization_endpoint": "https://example.com/oauth/device", - "discovery": true, - "discovery_allowlist": ["*.example.com"] - } - } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let ServerConfig::Http { - oauth: Some(oauth), .. - } = cfg.servers.get("custom").unwrap() - else { - panic!("expected http with oauth"); - }; - assert_eq!( - oauth.authorize_url.as_deref(), - Some("https://example.com/oauth/authorize"), - ); - assert_eq!( - oauth.token_url.as_deref(), - Some("https://example.com/oauth/token"), - ); - assert_eq!(oauth.client_id.as_deref(), Some("abc123")); - assert_eq!( - oauth.device_authorization_endpoint.as_deref(), - Some("https://example.com/oauth/device"), - ); - assert!(oauth.discovery); - assert_eq!(oauth.discovery_allowlist, vec!["*.example.com".to_string()]); - } - - #[test] - fn validate_rejects_discovery_without_allowlist() { - let oauth = OAuthConfig { - provider: Some("custom".into()), - discovery: true, - ..Default::default() - }; - let err = oauth.validate("srv").unwrap_err().to_string(); - assert!(err.contains("discovery_allowlist"), "got: {err}"); - assert!(err.contains("srv"), "got: {err}"); - } - - #[test] - fn validate_accepts_discovery_with_allowlist() { - let oauth = OAuthConfig { - provider: Some("custom".into()), - discovery: true, - discovery_allowlist: vec!["*.example.com".into()], - ..Default::default() - }; - oauth.validate("srv").unwrap(); - } - - #[test] - fn load_layered_rejects_invalid_discovery_config() { - let dir = tempfile::tempdir().unwrap(); - let project = dir.path().join("project.json"); - std::fs::write( - &project, - r#"{"mcpServers":{"bad":{"type":"http","url":"https://example.com","oauth":{"provider":"custom","discovery":true}}}}"#, - ) - .unwrap(); - let err = McpConfig::load_layered(None, Some(&project)) - .unwrap_err() - .to_string(); - assert!(err.contains("discovery_allowlist"), "got: {err}"); - } } diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs deleted file mode 100644 index 7d0fd7c80..000000000 --- a/openab-agent/src/mcp/flow.rs +++ /dev/null @@ -1,219 +0,0 @@ -//! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from -//! `crate::auth::generate_pkce` — shared with the Codex paths so a -//! security-primitive change can't drift between modules. Device -//! polling orchestration lands in a subsequent slice. - -use anyhow::{anyhow, Result}; -use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; -use url::Url; - -use super::oauth::ResolvedProvider; -use crate::auth::generate_pkce; - -/// 16-byte URL-safe `state` nonce for the OAuth authorize URL. -fn generate_state() -> String { - let mut buf = [0u8; 16]; - getrandom::fill(&mut buf).expect("getrandom failed"); - URL_SAFE_NO_PAD.encode(buf) -} - -/// Result of `init_paste_authorize`: the URL to surface to the user, plus -/// the `code_verifier` + `state` the caller must persist under the -/// pending-login key for `complete_login` to validate the callback. -pub struct PasteAuthorize { - pub url: String, - pub code_verifier: String, - pub state: String, -} - -/// Start a paste-back OAuth 2.1 authorize flow. Generates the PKCE pair -/// and state nonce internally so the caller can't pair them up wrong; -/// builds the RFC 6749 authorize URL with `S256` PKCE and space-joined -/// scopes. `client_id` is caller-supplied: built-ins look it up via a -/// hard-coded helper (mirroring `auth::codex_client_id`); custom -/// providers carry it on `ResolvedProvider::Custom`. `redirect_uri` is -/// the provider's pinned callback for built-ins or a runtime-bound -/// `localhost:` for custom paste-back flows. -pub fn init_paste_authorize( - provider: &ResolvedProvider, - client_id: &str, - redirect_uri: &str, -) -> Result { - let (code_verifier, code_challenge) = generate_pkce(); - let state = generate_state(); - let mut url = Url::parse(provider.authorize_url())?; - url.query_pairs_mut() - .append_pair("response_type", "code") - .append_pair("client_id", client_id) - .append_pair("redirect_uri", redirect_uri) - .append_pair("code_challenge", &code_challenge) - .append_pair("code_challenge_method", "S256") - .append_pair("state", &state) - .append_pair("scope", &provider.scopes().join(" ")); - Ok(PasteAuthorize { - url: url.to_string(), - code_verifier, - state, - }) -} - -/// Parse a paste-back callback URL into its authorization `code` after -/// validating the `state` echo. OAuth 2.1 RFC 6749 §10.12 + §4.1.2 — a -/// mismatched `state` indicates CSRF / cross-flow contamination and MUST -/// reject the exchange before any token-endpoint round-trip. Tolerates -/// extra query params (vendor-specific tracking, `iss`, etc.). -#[allow(dead_code)] // wired in next slice (runtime::complete_login) -pub fn parse_paste_callback(redirect_url: &str, expected_state: &str) -> Result { - let url = Url::parse(redirect_url).map_err(|e| anyhow!("invalid redirect URL: {e}"))?; - let mut code = None; - let mut state = None; - let mut error = None; - for (k, v) in url.query_pairs() { - match k.as_ref() { - "code" => code = Some(v.into_owned()), - "state" => state = Some(v.into_owned()), - "error" => error = Some(v.into_owned()), - _ => {} - } - } - if let Some(err) = error { - return Err(anyhow!("authorize endpoint returned error: {err}")); - } - let got_state = state.ok_or_else(|| anyhow!("callback missing state"))?; - if got_state != expected_state { - return Err(anyhow!("state mismatch; flow rejected")); - } - code.ok_or_else(|| anyhow!("callback missing code")) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::mcp::config::OAuthConfig; - use crate::mcp::oauth::resolve; - - const TEST_REDIRECT: &str = "http://localhost:53692/callback"; - - #[test] - fn state_is_url_safe_and_unique() { - let s = generate_state(); - let url_safe = s - .chars() - .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'); - assert!(url_safe); - assert_ne!(s, generate_state()); - } - - fn builtin_provider() -> ResolvedProvider { - let cfg = OAuthConfig { - provider: Some("anthropic-mcp".to_string()), - ..Default::default() - }; - resolve(&cfg).unwrap() - } - - #[test] - fn init_paste_authorize_threads_pkce_and_state_into_url() { - let p = builtin_provider(); - let r = init_paste_authorize(&p, "client-xyz", TEST_REDIRECT).unwrap(); - assert!(r.url.starts_with("https://claude.ai/oauth/authorize?")); - assert!(r.url.contains("response_type=code")); - assert!(r.url.contains("client_id=client-xyz")); - assert!(r.url.contains("code_challenge_method=S256")); - assert!(r.url.contains(&format!("state={}", r.state))); - assert!(!r.code_verifier.is_empty()); - } - - #[test] - fn init_paste_authorize_percent_encodes_redirect_uri() { - let p = builtin_provider(); - let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); - let want = "redirect_uri=http%3A%2F%2Flocalhost%3A53692%2Fcallback"; - assert!(r.url.contains(want)); - } - - #[test] - fn init_paste_authorize_form_encodes_scope_spaces_as_plus() { - let p = builtin_provider(); - let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); - assert!(r.url.contains("scope=org%3Acreate_api_key")); - assert!(r.url.contains("user%3Amcp_servers")); - } - - #[test] - fn init_paste_authorize_rejects_unparseable_authorize_url() { - let cfg = OAuthConfig { - provider: Some("broken".to_string()), - authorize_url: Some("not a url".to_string()), - token_url: Some("https://example.com/token".to_string()), - ..Default::default() - }; - let p = resolve(&cfg).unwrap(); - assert!(init_paste_authorize(&p, "c", TEST_REDIRECT).is_err()); - } - - #[test] - fn init_paste_authorize_for_custom_provider() { - let cfg = OAuthConfig { - provider: Some("linear".to_string()), - authorize_url: Some("https://linear.app/oauth/authorize".to_string()), - token_url: Some("https://api.linear.app/oauth/token".to_string()), - client_id: Some("linear-client".to_string()), - scopes: vec!["read".to_string(), "write".to_string()], - ..Default::default() - }; - let p = resolve(&cfg).unwrap(); - let r = init_paste_authorize(&p, "linear-client", TEST_REDIRECT).unwrap(); - assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); - assert!(r.url.contains("scope=read+write")); - } - - #[test] - fn parse_paste_callback_extracts_code_when_state_matches() { - let url = "http://localhost:53692/callback?code=abc123&state=xyz"; - let code = parse_paste_callback(url, "xyz").unwrap(); - assert_eq!(code, "abc123"); - } - - #[test] - fn parse_paste_callback_tolerates_extra_query_params() { - let url = "http://localhost:53692/cb?iss=https%3A%2F%2Fauth&state=s&code=c&tracking=1"; - let code = parse_paste_callback(url, "s").unwrap(); - assert_eq!(code, "c"); - } - - #[test] - fn parse_paste_callback_rejects_state_mismatch() { - let url = "http://localhost:53692/cb?code=c&state=wrong"; - let err = parse_paste_callback(url, "want").unwrap_err().to_string(); - assert!(err.contains("state mismatch"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_rejects_missing_state() { - let url = "http://localhost:53692/cb?code=c"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("missing state"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_rejects_missing_code() { - let url = "http://localhost:53692/cb?state=x"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("missing code"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_surfaces_authorize_error() { - let url = "http://localhost:53692/cb?error=access_denied&state=x"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("access_denied"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_rejects_unparseable_url() { - let url = "not a url"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("invalid redirect URL"), "got: {err}"); - } -} diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 7ecfe0034..557badf4c 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -186,7 +186,6 @@ fn status_label(status: &ServerStatus) -> &'static str { ServerStatus::Disconnected => "disconnected", ServerStatus::Connecting => "connecting", ServerStatus::Connected => "connected", - ServerStatus::NeedsAuth => "needs_auth", ServerStatus::Failed(_) => "failed", } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 81278aa4e..55f210c16 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,9 +1,7 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; -pub mod flow; pub mod meta_tool; -pub mod oauth; pub mod runtime; use serde_json::json; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs deleted file mode 100644 index f3ea31661..000000000 --- a/openab-agent/src/mcp/oauth.rs +++ /dev/null @@ -1,351 +0,0 @@ -//! OAuth provider catalog (ADR §6.2) + custom-provider resolution (§6.3). -//! Wiring into the rmcp Streamable HTTP transport + agent-guided flows -//! (§6.4) lands in subsequent slices; this module is the data layer the -//! login / refresh code will dispatch through. - -// The §6.4 login slice is the first prod caller — until then, every item -// here is reachable only via the unit tests below, so `cargo clippy -// --features mcp -- -D warnings` would flag them as dead. Module-scope -// allow rather than per-item once that slice lands. -#![allow(dead_code)] - -use anyhow::{anyhow, Result}; - -use super::config::OAuthConfig; - -/// Static description of a single built-in OAuth provider. `default_scopes` -/// is the minimum set the agent will request when `oauth.scopes` is omitted -/// from the server config; per-server overrides win when present. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct ProviderSpec { - pub name: &'static str, - pub authorize_url: &'static str, - pub token_url: &'static str, - pub callback: &'static str, - pub default_scopes: &'static [&'static str], -} - -/// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` -/// is the broadest grant; consumers should narrow via per-server overrides. -pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { - name: "anthropic-mcp", - authorize_url: "https://claude.ai/oauth/authorize", - token_url: "https://platform.claude.com/v1/oauth/token", - callback: "http://localhost:53692/callback", - default_scopes: &[ - "org:create_api_key", - "user:profile", - "user:inference", - "user:sessions:claude_code", - "user:mcp_servers", - "user:file_upload", - ], -}; - -const BUILTINS: &[ProviderSpec] = &[ANTHROPIC_MCP]; - -/// Look up a built-in `ProviderSpec` by config name. Returns `None` for -/// custom providers (§6.3) and for unknown names. -pub fn builtin(name: &str) -> Option { - BUILTINS.iter().copied().find(|spec| spec.name == name) -} - -/// Resolve a built-in provider's OAuth `client_id`. Mirrors -/// `auth::codex_client_id`'s env-var-override pattern but without a hard- -/// coded default — the Anthropic MCP public client_id isn't yet pinned in -/// this repo, so requiring the env var fails fast with a useful error -/// rather than silently dialing with a placeholder. Replace with a -/// hard-coded default once a real value is published. -pub fn builtin_client_id(provider: &str) -> Result { - let env_var = match provider { - "anthropic-mcp" => "OPENAB_MCP_ANTHROPIC_CLIENT_ID", - other => { - return Err(anyhow!( - "no built-in client_id mapping for provider {other:?}" - )); - } - }; - std::env::var(env_var).map_err(|_| { - anyhow!( - "built-in provider {provider:?} requires env var {env_var} \ - (client_id of the provider's OAuth app)" - ) - }) -} - -/// Effective per-server OAuth parameters after resolving the built-in catalog -/// and `OAuthConfig` overrides. -/// -/// The two variants encode invariants that an `Option`-heavy struct couldn't: -/// built-ins always pin a `callback` (their PKCE port is hard-coded in the -/// provider's app registration) and never carry a `client_id` (the §6.4 flow -/// code owns it, mirroring `auth.rs::codex_client_id()`). Custom providers -/// flip both: §6.4 allocates a free port at login time, and `client_id` -/// comes from config (OAuth 2.1 public clients vary on registration). -/// -/// `device_authorization_endpoint` only appears on `Custom` — adding device -/// support for a built-in provider is a `ProviderSpec` schema change, not a -/// config flag. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ResolvedProvider { - Builtin { - provider_name: &'static str, - authorize_url: &'static str, - token_url: &'static str, - callback: &'static str, - scopes: Vec, - }, - Custom { - provider_name: String, - authorize_url: String, - token_url: String, - client_id: Option, - device_authorization_endpoint: Option, - scopes: Vec, - }, -} - -impl ResolvedProvider { - /// Accessor for the shared `authorize_url` field. Callers that don't - /// need to distinguish built-in vs custom can skip the `match`. - pub fn authorize_url(&self) -> &str { - match self { - Self::Builtin { authorize_url, .. } => authorize_url, - Self::Custom { authorize_url, .. } => authorize_url, - } - } - - /// Accessor for the shared `token_url` field. - pub fn token_url(&self) -> &str { - match self { - Self::Builtin { token_url, .. } => token_url, - Self::Custom { token_url, .. } => token_url, - } - } - - /// Accessor for the shared scope list. - pub fn scopes(&self) -> &[String] { - match self { - Self::Builtin { scopes, .. } | Self::Custom { scopes, .. } => scopes, - } - } -} - -/// Resolve a server's `oauth:` block. Built-in providers come from -/// `builtin()`; unknown providers fall through to the §6.3 custom path, -/// which requires `authorize_url` + `token_url` on the config. -/// -/// `OAuthConfig::scopes`, when non-empty, replaces the built-in defaults -/// entirely — the caller never needs to merge. -pub fn resolve(cfg: &OAuthConfig) -> Result { - let provider = cfg - .provider - .as_deref() - .ok_or_else(|| anyhow!("oauth.provider is required"))?; - match builtin(provider) { - Some(spec) => Ok(resolve_builtin(spec, cfg)), - None => resolve_custom(provider, cfg), - } -} - -fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { - let scopes = if cfg.scopes.is_empty() { - spec.default_scopes.iter().map(|s| s.to_string()).collect() - } else { - cfg.scopes.clone() - }; - ResolvedProvider::Builtin { - provider_name: spec.name, - authorize_url: spec.authorize_url, - token_url: spec.token_url, - callback: spec.callback, - scopes, - } -} - -fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result { - let authorize_url = cfg.authorize_url.clone().ok_or_else(|| { - anyhow!("custom oauth provider {provider:?}: oauth.authorize_url is required (ADR §6.3)") - })?; - let token_url = cfg.token_url.clone().ok_or_else(|| { - anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") - })?; - Ok(ResolvedProvider::Custom { - provider_name: provider.to_string(), - authorize_url, - token_url, - client_id: cfg.client_id.clone(), - device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), - scopes: cfg.scopes.clone(), - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - // Both env-touching tests below race the same OS env var; serialize - // them per the runbook's Tick 24 lesson (acp.rs ANTHROPIC_API_KEY race). - static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); - - #[test] - fn builtin_client_id_requires_env_var() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // SAFETY: serialized via ENV_LOCK; isolated env key. - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - let err = builtin_client_id("anthropic-mcp").unwrap_err().to_string(); - assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); - } - - #[test] - fn builtin_client_id_uses_env_var_when_set() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // SAFETY: serialized via ENV_LOCK; isolated env key. - unsafe { - std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-test-id"); - } - let id = builtin_client_id("anthropic-mcp").unwrap(); - assert_eq!(id, "anth-test-id"); - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - } - - #[test] - fn builtin_client_id_rejects_unknown_provider() { - let err = builtin_client_id("does-not-exist").unwrap_err().to_string(); - assert!(err.contains("does-not-exist"), "got: {err}"); - } - - #[test] - fn anthropic_mcp_spec_matches_adr_table() { - let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); - assert_eq!(spec.authorize_url, "https://claude.ai/oauth/authorize"); - assert_eq!(spec.token_url, "https://platform.claude.com/v1/oauth/token"); - assert_eq!(spec.callback, "http://localhost:53692/callback"); - assert!(spec.default_scopes.contains(&"user:mcp_servers")); - } - - #[test] - fn unknown_provider_returns_none() { - assert!(builtin("does-not-exist").is_none()); - assert!(builtin("").is_none()); - } - - #[test] - fn resolve_builtin_uses_default_scopes_when_config_omits_them() { - let cfg = OAuthConfig { - provider: Some("anthropic-mcp".to_string()), - ..Default::default() - }; - let ResolvedProvider::Builtin { - provider_name, - callback, - scopes, - .. - } = resolve(&cfg).unwrap() - else { - panic!("expected Builtin variant"); - }; - assert_eq!(provider_name, "anthropic-mcp"); - assert_eq!(callback, ANTHROPIC_MCP.callback); - assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); - } - - #[test] - fn resolve_builtin_uses_config_scopes_when_provided() { - let cfg = OAuthConfig { - provider: Some("anthropic-mcp".to_string()), - scopes: vec!["user:profile".to_string(), "user:inference".to_string()], - ..Default::default() - }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.scopes(), &["user:profile", "user:inference"]); - } - - #[test] - fn resolve_rejects_missing_provider() { - let err = resolve(&OAuthConfig::default()).unwrap_err().to_string(); - assert!(err.contains("required"), "got: {err}"); - } - - #[test] - fn resolve_custom_uses_config_urls_and_propagates_device_endpoint() { - let cfg = OAuthConfig { - provider: Some("linear".to_string()), - authorize_url: Some("https://linear.app/oauth/authorize".to_string()), - token_url: Some("https://api.linear.app/oauth/token".to_string()), - client_id: Some("client-abc".to_string()), - device_authorization_endpoint: Some("https://linear.app/oauth/device".to_string()), - scopes: vec!["read".to_string(), "write".to_string()], - ..Default::default() - }; - let ResolvedProvider::Custom { - provider_name, - authorize_url, - token_url, - client_id, - device_authorization_endpoint, - scopes, - } = resolve(&cfg).unwrap() - else { - panic!("expected Custom variant"); - }; - assert_eq!(provider_name, "linear"); - assert_eq!(authorize_url, "https://linear.app/oauth/authorize"); - assert_eq!(token_url, "https://api.linear.app/oauth/token"); - assert_eq!(client_id.as_deref(), Some("client-abc")); - assert_eq!( - device_authorization_endpoint.as_deref(), - Some("https://linear.app/oauth/device"), - ); - assert_eq!(scopes, vec!["read", "write"]); - } - - #[test] - fn resolve_custom_minimal_two_urls_only() { - let cfg = OAuthConfig { - provider: Some("acme".to_string()), - authorize_url: Some("https://acme.example/authorize".to_string()), - token_url: Some("https://acme.example/token".to_string()), - ..Default::default() - }; - let ResolvedProvider::Custom { - client_id, - device_authorization_endpoint, - scopes, - .. - } = resolve(&cfg).unwrap() - else { - panic!("expected Custom variant"); - }; - assert!(client_id.is_none()); - assert!(device_authorization_endpoint.is_none()); - assert!(scopes.is_empty()); - } - - #[test] - fn resolve_custom_rejects_missing_authorize_url() { - let cfg = OAuthConfig { - provider: Some("custom".to_string()), - token_url: Some("https://example.com/token".to_string()), - ..Default::default() - }; - let err = resolve(&cfg).unwrap_err().to_string(); - assert!(err.contains("authorize_url"), "got: {err}"); - assert!(err.contains("custom"), "got: {err}"); - } - - #[test] - fn resolve_custom_rejects_missing_token_url() { - let cfg = OAuthConfig { - provider: Some("custom".to_string()), - authorize_url: Some("https://example.com/authorize".to_string()), - ..Default::default() - }; - let err = resolve(&cfg).unwrap_err().to_string(); - assert!(err.contains("token_url"), "got: {err}"); - } -} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 9e8a517e1..933fd23d5 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -13,7 +13,6 @@ //! the duration of a child-process spawn + handshake. use std::collections::HashMap; -use std::path::PathBuf; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -24,16 +23,13 @@ use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -use super::flow::init_paste_authorize; -use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; -use crate::auth::{auth_path, load_pending_login, save_pending_login, PendingPasteLogin}; +#[allow(dead_code)] // NeedsAuth lands with the Phase 2 OAuth slice (ADR §5.7) #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, Connecting, Connected, - NeedsAuth, Failed(String), } @@ -43,7 +39,6 @@ impl ServerStatus { ServerStatus::Disconnected => "○", ServerStatus::Connecting => "◐", ServerStatus::Connected => "●", - ServerStatus::NeedsAuth => "◌", ServerStatus::Failed(_) => "✗", } } @@ -71,33 +66,15 @@ impl std::fmt::Debug for ServerHandle { } } -/// Public return of `start_paste_login`. The caller relays `authorize_url` -/// to the user; `state` is echoed so the agent can show / log it without -/// reaching into runtime internals. -#[derive(Debug, Clone)] -#[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) -pub struct PasteLoginStart { - pub authorize_url: String, - pub state: String, -} - /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { handles: Arc>>, - /// `auth.json` location used for `mcp-pending:` persistence. - /// Injectable so tests can point at a tempdir instead of `$HOME`, - /// avoiding cross-module HOME-env races (Tick 24 lesson + ADR §6.4). - auth_path: PathBuf, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { - Self::from_config_with_auth_path(cfg, auth_path()) - } - - pub fn from_config_with_auth_path(cfg: McpConfig, auth_path: PathBuf) -> Self { let handles: HashMap<_, _> = cfg .servers .into_iter() @@ -113,7 +90,6 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), - auth_path, } } @@ -170,93 +146,9 @@ impl McpRuntimeManager { out } - /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` - /// block (ADR §6.4). Produces the authorize URL the agent surfaces to - /// the user; the matching PKCE verifier + `state` nonce are persisted - /// under `mcp-pending:` in `auth.json` for `complete_login` - /// (next slice) to consume. - /// - /// Scoped to **built-in** providers this slice. Custom-provider - /// paste-back needs runtime port allocation for the callback (§6.4), - /// and any provider that advertises a `device_authorization_endpoint` - /// should run device-code instead (§6.4 selection logic). Both errors - /// are explicit so the LLM can pick a different action. - #[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) - pub async fn start_paste_login(&self, name: &str) -> Result { - let oauth_cfg = { - let guard = self.handles.read().await; - let handle = guard - .get(name) - .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; - match handle.config.resolved(name)? { - ServerConfig::Http { - oauth: Some(oauth), .. - } => oauth, - ServerConfig::Http { oauth: None, .. } => { - return Err(anyhow!("mcp server {name:?} has no oauth block")); - } - ServerConfig::Stdio { .. } => { - return Err(anyhow!("mcp server {name:?} is stdio, not http+oauth")); - } - } - }; - - let provider = resolve(&oauth_cfg)?; - let (client_id, redirect_uri) = match &provider { - ResolvedProvider::Builtin { - provider_name, - callback, - .. - } => (builtin_client_id(provider_name)?, (*callback).to_string()), - ResolvedProvider::Custom { - device_authorization_endpoint: Some(_), - .. - } => { - return Err(anyhow!( - "mcp server {name:?} has a device endpoint; use device flow" - )); - } - ResolvedProvider::Custom { .. } => { - return Err(anyhow!( - "mcp server {name:?}: custom-provider paste-back not yet supported" - )); - } - }; - - let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; - let pending = PendingPasteLogin { - verifier: started.code_verifier, - state: started.state.clone(), - token_url: provider.token_url().to_string(), - provider_name: provider_name_of(&provider), - }; - save_pending_login(&self.auth_path, &pending_key(name), &pending)?; - { - let mut handles = self.handles.write().await; - if let Some(handle) = handles.get_mut(name) { - handle.status = ServerStatus::NeedsAuth; - } - } - Ok(PasteLoginStart { - authorize_url: started.url, - state: started.state, - }) - } - - /// Read the on-disk pending paste-login for `name`. `None` if there's - /// no entry or the file is unreadable; `complete_login` (next slice) - /// is the intended consumer and will distinguish the cases via the - /// `auth::load_pending_login` error message. - #[allow(dead_code)] // first prod caller is complete_login in next slice - pub async fn pending_paste_login(&self, name: &str) -> Option { - load_pending_login(&self.auth_path, &pending_key(name)).ok() - } - /// Lazy-connect the named server (ADR §5.7). Idempotent if already - /// `Connected` with a live client. HTTP servers with an `oauth:` block - /// are routed through `mcp login` first — `connect` marks them - /// `NeedsAuth` and returns an error pointing the caller at the login - /// subcommand rather than attempting an unauthenticated dial. + /// `Connected` with a live client. HTTP servers requiring OAuth are + /// rejected until the Phase 2 auth slice lands (ADR §6). pub async fn connect(&self, name: &str) -> Result<()> { let dial = { let mut guard = self.handles.write().await; @@ -271,16 +163,17 @@ impl McpRuntimeManager { ServerConfig::Stdio { command, args, env, .. } => Dial::Stdio { command, args, env }, - // Oauth-protected servers can't be dialed via plain connect; - // mark `NeedsAuth` so `mcp status` shows a persistent - // "waiting for login" signal (vs `Disconnected`, which - // implies a plain `connect` would succeed). The `Failed` - // path remains reserved for dials that were attempted and - // failed at handshake. - ServerConfig::Http { oauth: Some(_), .. } => { - handle.status = ServerStatus::NeedsAuth; + // Reject oauth-protected servers BEFORE the `Connecting` + // transition: we never attempted a handshake, so leaving + // status at `Disconnected` is the honest state. Status + // becomes `Failed` only when a dial was actually tried. + ServerConfig::Http { + oauth: Some(_), + url, + .. + } => { return Err(anyhow!( - "mcp server {name:?} needs oauth login — run `mcp login {name}`" + "oauth-protected http server {url:?} requires the auth slice (Phase 2 §6)" )); } ServerConfig::Http { url, .. } => Dial::Http { url }, @@ -316,20 +209,6 @@ impl McpRuntimeManager { } } -/// Stringified provider name for the pending-state record. `Builtin` keeps -/// its `&'static str` static; `Custom` already owns a `String`. -fn provider_name_of(provider: &ResolvedProvider) -> String { - match provider { - ResolvedProvider::Builtin { provider_name, .. } => (*provider_name).to_string(), - ResolvedProvider::Custom { provider_name, .. } => provider_name.clone(), - } -} - -/// `auth.json` key for an in-flight paste-login (ADR §6.4 namespace). -fn pending_key(name: &str) -> String { - format!("mcp-pending:{name}") -} - /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -419,7 +298,7 @@ mod tests { } #[tokio::test] - async fn connect_http_with_oauth_marks_needs_auth() { + async fn connect_http_with_oauth_defers_to_auth_slice() { let json = r#"{ "mcpServers": { "linear": { @@ -432,33 +311,10 @@ mod tests { let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); - assert!(err.contains("needs oauth login"), "expected hint in {err}"); - assert!( - err.contains("mcp login"), - "expected 'mcp login' hint in {err}" - ); - assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); - } - - #[tokio::test] - async fn connect_oauth_twice_keeps_needs_auth_sticky() { - // Second connect() must NOT silently re-enter `Connecting` and - // shadow the user-actionable state — the only path out of - // `NeedsAuth` is a successful `mcp login`. - let json = r#"{ - "mcpServers": { - "linear": { - "type": "http", - "url": "https://mcp.linear.app/mcp", - "oauth": { "provider": "linear" } - } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - assert!(mgr.connect("linear").await.is_err()); - assert!(mgr.connect("linear").await.is_err()); - assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + assert!(err.contains("oauth"), "expected 'oauth' in {err}"); + // OAuth rejection happens BEFORE the Connecting transition, so the + // server remains Disconnected — no dial was attempted. + assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); } #[tokio::test] @@ -481,144 +337,6 @@ mod tests { } } - // start_paste_login + builtin_client_id race on the same env var. - // Same fix as oauth.rs / acp.rs (Tick 24 lesson). - static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); - - fn linear_custom_cfg() -> &'static str { - r#"{ - "mcpServers": { - "linear": { - "type": "http", - "url": "https://mcp.linear.app/mcp", - "oauth": { - "provider": "linear", - "authorize_url": "https://linear.app/oauth/authorize", - "token_url": "https://api.linear.app/oauth/token", - "client_id": "linear-client", - "scopes": ["read"] - } - } - } - }"# - } - - fn anthropic_builtin_cfg() -> &'static str { - r#"{ - "mcpServers": { - "anthro": { - "type": "http", - "url": "https://example.com/mcp", - "oauth": { "provider": "anthropic-mcp" } - } - } - }"# - } - - async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { - mgr.start_paste_login(name).await.unwrap_err().to_string() - } - - fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { - let dir = tempfile::tempdir().unwrap(); - let mgr = McpRuntimeManager::from_config_with_auth_path(cfg, dir.path().join("auth.json")); - (mgr, dir) - } - - #[tokio::test] - async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // SAFETY: serialized via ENV_LOCK; isolated env key. - unsafe { - std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); - } - let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); - let (mgr, _dir) = mgr_with_tempdir(cfg); - let start = mgr.start_paste_login("anthro").await.unwrap(); - assert!(start - .authorize_url - .starts_with("https://claude.ai/oauth/authorize?")); - assert!(start.authorize_url.contains("client_id=anth-cid")); - assert!(start - .authorize_url - .contains(&format!("state={}", start.state))); - let pending = mgr.pending_paste_login("anthro").await.unwrap(); - assert_eq!(pending.state, start.state); - assert!(!pending.verifier.is_empty()); - assert_eq!( - pending.token_url, - "https://platform.claude.com/v1/oauth/token" - ); - assert_eq!(pending.provider_name, "anthropic-mcp"); - assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - } - - #[tokio::test] - async fn start_paste_login_rejects_custom_provider_for_now() { - let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); - let (mgr, _dir) = mgr_with_tempdir(cfg); - let err = start_login_err(&mgr, "linear").await; - assert!(err.contains("custom-provider"), "got: {err}"); - assert!(mgr.pending_paste_login("linear").await.is_none()); - } - - #[tokio::test] - async fn start_paste_login_rejects_custom_with_device_endpoint() { - let json = r#"{ - "mcpServers": { - "dev": { - "type": "http", - "url": "https://example.com/mcp", - "oauth": { - "provider": "dev", - "authorize_url": "https://example.com/oauth/authorize", - "token_url": "https://example.com/oauth/token", - "device_authorization_endpoint": "https://example.com/oauth/device" - } - } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - let err = start_login_err(&mgr, "dev").await; - assert!(err.contains("device flow"), "got: {err}"); - } - - #[tokio::test] - async fn start_paste_login_rejects_stdio_server() { - let json = r#"{ - "mcpServers": { - "fs": { "type": "stdio", "command": "mcp-server-filesystem" } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - let err = start_login_err(&mgr, "fs").await; - assert!(err.contains("stdio"), "got: {err}"); - } - - #[tokio::test] - async fn start_paste_login_unknown_server_errors() { - let mgr = McpRuntimeManager::from_config(McpConfig::default()); - let err = start_login_err(&mgr, "ghost").await; - assert!(err.contains("ghost"), "got: {err}"); - } - - #[tokio::test] - async fn start_paste_login_builtin_without_env_var_errors_loud() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - let err = start_login_err(&mgr, "anthro").await; - assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); - } - #[tokio::test] async fn connect_to_missing_binary_records_failed() { let json = r#"{ From e90ef01a78dc952d46697869dd030509d2f67b5a Mon Sep 17 00:00:00 2001 From: shaun-agent Date: Mon, 1 Jun 2026 09:24:29 +0000 Subject: [PATCH 52/98] fix(openab-agent/mcp): harden env handling --- openab-agent/Cargo.lock | 704 ++++++++++++++++++++++++++++-- openab-agent/Cargo.toml | 1 + openab-agent/src/acp.rs | 44 +- openab-agent/src/auth.rs | 11 +- openab-agent/src/mcp/config.rs | 13 +- openab-agent/src/mcp/meta_tool.rs | 5 +- openab-agent/src/mcp/runtime.rs | 59 ++- 7 files changed, 775 insertions(+), 62 deletions(-) diff --git a/openab-agent/Cargo.lock b/openab-agent/Cargo.lock index 5f878017f..42ed19750 100644 --- a/openab-agent/Cargo.lock +++ b/openab-agent/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "1.0.0" @@ -67,12 +76,29 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + [[package]] name = "base64" version = "0.22.1" @@ -85,6 +111,24 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "bumpalo" version = "3.20.3" @@ -99,9 +143,9 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.62" +version = "1.2.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" dependencies = [ "find-msvc-tools", "shlex", @@ -119,6 +163,20 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "clap" version = "4.6.1" @@ -165,11 +223,81 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "crypto-common" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common 0.1.7", +] + +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.0", + "const-oid", + "crypto-common 0.2.2", +] + [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -219,6 +347,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.32" @@ -226,6 +369,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -234,6 +378,40 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + [[package]] name = "futures-task" version = "0.3.32" @@ -246,12 +424,27 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -352,11 +545,20 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", @@ -411,6 +613,30 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.2.0" @@ -538,6 +764,25 @@ version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" +[[package]] +name = "is-docker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928bae27f42bc99b60d9ac7334e3a21d10ad8f1835a4e12ec3ec0464765ed1b3" +dependencies = [ + "once_cell", +] + +[[package]] +name = "is-wsl" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "173609498df190136aa7dea1a91db051746d339e18476eed5ca40521f02d7aa5" +dependencies = [ + "is-docker", + "once_cell", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -624,21 +869,33 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", "windows-sys 0.61.2", ] +[[package]] +name = "nix" +version = "0.31.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -648,6 +905,34 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "oauth2" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51e219e79014df21a225b1860a479e2dcd7cbd9130f4defd4bd0e191ea31d67d" +dependencies = [ + "base64", + "chrono", + "getrandom 0.2.17", + "http", + "rand 0.8.6", + "serde", + "serde_json", + "serde_path_to_error", + "sha2 0.10.9", + "thiserror 1.0.69", + "url", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -660,20 +945,39 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "open" +version = "5.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fbaa89d2ddc8473c78a3adf69eea8cffa28c483b8e02a971ef31527cd0fc92c" +dependencies = [ + "is-wsl", + "libc", + "pathdiff", +] + [[package]] name = "openab-agent" version = "0.1.0" dependencies = [ "anyhow", + "base64", "clap", + "getrandom 0.4.2", "libc", - "reqwest", + "open", + "reqwest 0.12.28", + "rmcp", "serde", "serde_json", + "sha2 0.11.0", + "temp-env", "tempfile", "tokio", "tracing", "tracing-subscriber", + "url", + "urlencoding", "uuid", ] @@ -700,6 +1004,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + [[package]] name = "percent-encoding" version = "2.3.2" @@ -749,6 +1059,20 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "process-wrap" +version = "9.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e842efad9119158434d193c6682e2ebee4b44d6ad801d7b349623b3f57cdf55" +dependencies = [ + "futures", + "indexmap", + "nix", + "tokio", + "tracing", + "windows", +] + [[package]] name = "quinn" version = "0.11.9" @@ -763,7 +1087,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -778,13 +1102,13 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.4", "ring", "rustc-hash", "rustls", "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -825,14 +1149,35 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", ] [[package]] @@ -842,7 +1187,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -918,6 +1272,40 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "reqwest" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "sync_wrapper", + "tokio", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", +] + [[package]] name = "ring" version = "0.17.14" @@ -932,6 +1320,31 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmcp" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0810a9f717d9828f475fe1f629f4c305c8464b7f496c3a854b58d29e65f4058e" +dependencies = [ + "async-trait", + "chrono", + "futures", + "http", + "oauth2", + "pin-project-lite", + "process-wrap", + "reqwest 0.13.4", + "serde", + "serde_json", + "sse-stream", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "url", +] + [[package]] name = "rustc-hash" version = "2.1.2" @@ -1053,6 +1466,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1065,6 +1489,28 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1076,9 +1522,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "signal-hook-registry" @@ -1104,14 +1550,27 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.61.2", ] +[[package]] +name = "sse-stream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3962b63f038885f15bce2c6e02c0e7925c072f1ac86bb60fd44c5c6b762fb72" +dependencies = [ + "bytes", + "futures-util", + "http-body", + "http-body-util", + "pin-project-lite", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -1161,6 +1620,15 @@ dependencies = [ "syn", ] +[[package]] +name = "temp-env" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" +dependencies = [ + "parking_lot", +] + [[package]] name = "tempfile" version = "3.27.0" @@ -1174,13 +1642,33 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1266,6 +1754,30 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "tower" version = "0.5.3" @@ -1378,6 +1890,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1406,8 +1924,15 @@ dependencies = [ "idna", "percent-encoding", "serde", + "serde_derive", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -1422,9 +1947,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -1437,6 +1962,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "want" version = "0.3.1" @@ -1547,6 +2078,19 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -1588,12 +2132,107 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core", + "windows-link", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -1654,6 +2293,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -1875,18 +2523,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" dependencies = [ "proc-macro2", "quote", diff --git a/openab-agent/Cargo.toml b/openab-agent/Cargo.toml index 72edda354..5b091a47e 100644 --- a/openab-agent/Cargo.toml +++ b/openab-agent/Cargo.toml @@ -37,3 +37,4 @@ mcp = ["dep:rmcp"] [dev-dependencies] tempfile = "3" +temp-env = "0.3.6" diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 5d7f4c412..9585612b2 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -256,13 +256,6 @@ impl AcpServer { #[cfg(test)] mod tests { use super::*; - use std::sync::Mutex; - - /// Serializes tests that mutate process-global env vars (notably - /// `ANTHROPIC_API_KEY`). Without this, `test_session_new` and - /// `test_session_new_missing_key` race on the same key when run in - /// parallel — set/remove from one thread is observed by the other. - static ENV_LOCK: Mutex<()> = Mutex::new(()); #[test] fn test_initialize_response() { @@ -277,11 +270,16 @@ mod tests { #[test] fn test_session_new() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // Set a fake key so from_env() succeeds in CI - unsafe { std::env::set_var("ANTHROPIC_API_KEY", "test-key") }; - let mut server = AcpServer::new(); - let resp_str = server.handle_session_new(2); + let resp_str = temp_env::with_vars( + [ + ("ANTHROPIC_API_KEY", Some("test-key")), + ("OPENAB_AGENT_PROVIDER", None), + ], + || { + let mut server = AcpServer::new(); + server.handle_session_new(2) + }, + ); let resp: Value = serde_json::from_str(&resp_str).unwrap(); assert_eq!(resp["jsonrpc"], "2.0"); assert_eq!(resp["id"], 2); @@ -290,15 +288,19 @@ mod tests { #[test] fn test_session_new_missing_key() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // Ensure no OAuth token exists either - let auth_path = - std::path::PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string())) - .join(".openab/agent/auth.json"); - let _ = std::fs::remove_file(&auth_path); - unsafe { std::env::remove_var("ANTHROPIC_API_KEY") }; - let mut server = AcpServer::new(); - let resp_str = server.handle_session_new(3); + let tmp = tempfile::TempDir::new().unwrap(); + let home = tmp.path().to_string_lossy().to_string(); + let resp_str = temp_env::with_vars( + [ + ("ANTHROPIC_API_KEY", None), + ("OPENAB_AGENT_PROVIDER", None), + ("HOME", Some(home.as_str())), + ], + || { + let mut server = AcpServer::new(); + server.handle_session_new(3) + }, + ); let resp: Value = serde_json::from_str(&resp_str).unwrap(); assert!(resp["error"].is_object()); assert!(resp["error"]["message"] diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 385ccede9..f34e681aa 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -517,15 +517,16 @@ mod tests { #[test] fn test_codex_client_id_default() { - unsafe { std::env::remove_var("OPENAB_AGENT_OAUTH_CLIENT_ID") }; - assert_eq!(codex_client_id(), "app_EMoamEEZ73f0CkXaXp7hrann"); + temp_env::with_var("OPENAB_AGENT_OAUTH_CLIENT_ID", None::<&str>, || { + assert_eq!(codex_client_id(), "app_EMoamEEZ73f0CkXaXp7hrann"); + }); } #[test] fn test_codex_client_id_override() { - unsafe { std::env::set_var("OPENAB_AGENT_OAUTH_CLIENT_ID", "custom_id") }; - assert_eq!(codex_client_id(), "custom_id"); - unsafe { std::env::remove_var("OPENAB_AGENT_OAUTH_CLIENT_ID") }; + temp_env::with_var("OPENAB_AGENT_OAUTH_CLIENT_ID", Some("custom_id"), || { + assert_eq!(codex_client_id(), "custom_id"); + }); } #[test] diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 742459430..89734917b 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -105,8 +105,12 @@ impl ServerConfig { /// callers should skip the server and continue (ADR §5.6 "per-server /// failure isolated"). `name` is the server name used in error context. pub fn resolved(&self, name: &str) -> Result { + self.resolved_with_env(name, &std::env::vars().collect()) + } + + fn resolved_with_env(&self, name: &str, env: &HashMap) -> Result { let json = serde_json::to_value(self)?; - let resolved = interpolate_value(json, &std::env::vars().collect()) + let resolved = interpolate_value(json, env) .with_context(|| format!("resolve env for mcp server {name:?}"))?; Ok(serde_json::from_value(resolved)?) } @@ -243,17 +247,14 @@ mod tests { #[test] fn resolved_substitutes_env_in_args() { - // SAFETY: single-threaded test; isolated env key. - unsafe { - std::env::set_var("MCP_TEST_TOKEN", "secret123"); - } + let env = env(&[("MCP_TEST_TOKEN", "secret123")]); let cfg = ServerConfig::Stdio { command: "github-mcp-server".into(), args: vec!["--token".into(), "${env:MCP_TEST_TOKEN}".into()], env: HashMap::new(), tool_filter: None, }; - match cfg.resolved("github").unwrap() { + match cfg.resolved_with_env("github", &env).unwrap() { ServerConfig::Stdio { args, .. } => { assert_eq!(args[1], "secret123"); } diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 557badf4c..22aafc639 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -149,7 +149,10 @@ async fn status(manager: &McpRuntimeManager, filter: Option<&str>) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot .into_iter() - .filter(|(name, _, _)| filter.is_none_or(|f| f == name.as_str())) + .filter(|(name, _, _)| match filter { + Some(f) => f == name.as_str(), + None => true, + }) .map(|(name, status, transport)| { let last_error = match &status { ServerStatus::Failed(msg) => Some(msg.clone()), diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 933fd23d5..38d03747f 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -228,8 +228,9 @@ impl Dial { match self { Dial::Stdio { command, args, env } => { let cmd = Command::new(&command).configure(|c| { + c.env_clear(); + c.envs(stdio_child_env(&env)); c.args(&args); - c.envs(&env); }); let transport = TokioChildProcess::new(cmd) .with_context(|| format!("spawn mcp child process {command:?}"))?; @@ -247,6 +248,45 @@ impl Dial { } } +fn stdio_child_env(explicit: &HashMap) -> HashMap { + let mut env = baseline_child_env(); + env.extend(explicit.clone()); + env +} + +fn baseline_child_env() -> HashMap { + let mut env = HashMap::new(); + for key in baseline_env_keys() { + if let Ok(val) = std::env::var(key) { + env.insert((*key).to_string(), val); + } + } + env +} + +#[cfg(unix)] +fn baseline_env_keys() -> &'static [&'static str] { + &["HOME", "PATH", "TERM", "USER"] +} + +#[cfg(windows)] +fn baseline_env_keys() -> &'static [&'static str] { + &[ + "HOME", + "PATH", + "TERM", + "USERPROFILE", + "USERNAME", + "SystemRoot", + "SystemDrive", + ] +} + +#[cfg(not(any(unix, windows)))] +fn baseline_env_keys() -> &'static [&'static str] { + &["HOME", "PATH", "TERM"] +} + #[cfg(test)] mod tests { use super::*; @@ -356,4 +396,21 @@ mod tests { other => panic!("expected Failed, got {other:?}"), } } + + #[test] + fn stdio_child_env_keeps_only_baseline_plus_explicit() { + let mut explicit = HashMap::new(); + explicit.insert("MCP_TOKEN".to_string(), "server-token".to_string()); + explicit.insert("PATH".to_string(), "/custom/bin".to_string()); + + let env = stdio_child_env(&explicit); + + assert_eq!( + env.get("MCP_TOKEN").map(String::as_str), + Some("server-token") + ); + assert_eq!(env.get("PATH").map(String::as_str), Some("/custom/bin")); + assert!(!env.contains_key("DISCORD_BOT_TOKEN")); + assert!(!env.contains_key("ANTHROPIC_API_KEY")); + } } From 0c0b67132e8c2619ad3108578245b8aba81d63de Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:25:42 +0000 Subject: [PATCH 53/98] =?UTF-8?q?feat(openab-agent/auth):=20namespaced=20T?= =?UTF-8?q?okenStore=20+=20fsync=20(ADR=20=C2=A76.1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 §6.1 foundation. `auth.json` switches from a bare `TokenStore` to `HashMap` so MCP server credentials can sit alongside the existing Codex slot (`mcp:` vs `codex`). Legacy single-tenant files migrate transparently on read (discriminated by the top-level `access_token` key); the on-disk shape rewrites to the new layout on the next save. Codex login flows keep their existing public API — `load_tokens`/`save_tokens` now route through the codex namespace internally. `fsync(2)` lands on every write per the refresh-token rotation race contract: without it, a Spot interruption between local write and S3 sync could restore a revoked refresh token from durable storage. Public helpers for the MCP path (`load_/save_/remove_namespaced_token`) are feature-gated and `#[allow(dead_code)]` until the `mcp/oauth.rs` slice lands. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 173 +++++++++++++++++++++++++++++++++++---- 1 file changed, 158 insertions(+), 15 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index f34e681aa..869a8e6af 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -2,11 +2,16 @@ use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; +use std::collections::HashMap; use std::io::{BufRead, Write}; use std::net::TcpListener; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; +/// Namespace key for the existing Codex single-tenant credential. +/// Lives next to future `mcp:` entries inside `auth.json`. +const CODEX_NAMESPACE: &str = "codex"; + const REFRESH_SKEW_SECONDS: u64 = 120; const CODEX_AUTHORIZE_URL: &str = "https://auth.openai.com/oauth/authorize"; @@ -42,23 +47,36 @@ fn auth_path() -> PathBuf { .join("auth.json") } -pub fn load_tokens() -> Result { - let path = auth_path(); - let data = std::fs::read_to_string(&path).map_err(|_| { - anyhow!( - "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", - path.display() - ) - })?; - serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}")) +/// Read the `auth.json` map, transparently migrating a legacy single-tenant +/// Codex token file into the new namespaced shape. The migrated map is held +/// in-memory only; the file is rewritten in the new shape on the next save. +/// +/// Discriminates by the top-level `access_token` key — present means the +/// file is the legacy `TokenStore` shape, absent means the new namespaced +/// map. A single JSON parse gives accurate error context either way. +fn read_auth_file(path: &Path) -> Result> { + let data = std::fs::read_to_string(path)?; + let value: serde_json::Value = + serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; + if value.get("access_token").is_some() { + let legacy: TokenStore = serde_json::from_value(value) + .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; + let mut map = HashMap::new(); + map.insert(CODEX_NAMESPACE.to_string(), legacy); + return Ok(map); + } + serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) } -fn save_tokens(store: &TokenStore) -> Result<()> { - let path = auth_path(); +/// Atomically replace `auth.json` with the new map. `fsync(2)` after write +/// satisfies the ADR §6.1 refresh-token rotation contract — without it, a +/// Spot interruption between local write and S3 sync would restore a +/// revoked refresh token from durable storage on the next task start. +fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } - let data = serde_json::to_string_pretty(store)?; + let data = serde_json::to_string_pretty(map)?; #[cfg(unix)] { use std::fs::OpenOptions; @@ -69,16 +87,87 @@ fn save_tokens(store: &TokenStore) -> Result<()> { .create(true) .truncate(true) .mode(0o600) - .open(&path)?; + .open(path)?; file.write_all(data.as_bytes())?; + file.sync_all()?; } #[cfg(not(unix))] { - std::fs::write(&path, &data)?; + std::fs::write(path, &data)?; } Ok(()) } +pub fn load_tokens() -> Result { + let path = auth_path(); + let map = read_auth_file(&path).map_err(|_| { + anyhow!( + "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + })?; + map.get(CODEX_NAMESPACE).cloned().ok_or_else(|| { + anyhow!( + "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + }) +} + +fn save_tokens(store: &TokenStore) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(CODEX_NAMESPACE.to_string(), store.clone()); + write_auth_file(&path, &map) +} + +/// Look up the credential at `key` (e.g. `mcp:linear`). Returns the codex +/// entry for `key = "codex"`, but prefer `load_tokens()` for that path — +/// this helper exists for MCP server-namespaced lookups (ADR §6.1). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) +pub fn load_namespaced_token(key: &str) -> Result { + let path = auth_path(); + let map = read_auth_file(&path) + .map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + map.get(key) + .cloned() + .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) +} + +/// Insert or replace the credential at `key`, preserving all other entries. +/// Read-modify-write on a single file: callers in the same process must +/// serialize themselves (the lifecycle manager already does per ADR §5.7). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) +pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(key.to_string(), store.clone()); + write_auth_file(&path, &map) +} + +/// Remove the credential at `key`. Idempotent — missing key is not an +/// error. If the map becomes empty, the file is deleted so `mcp doctor` +/// can report "no credentials" instead of "empty file". +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp logout / revoked-refresh recovery) +pub fn remove_namespaced_token(key: &str) -> Result<()> { + let path = auth_path(); + let mut map = match read_auth_file(&path) { + Ok(m) => m, + Err(_) => return Ok(()), + }; + if map.remove(key).is_none() { + return Ok(()); + } + if map.is_empty() { + let _ = std::fs::remove_file(&path); + return Ok(()); + } + write_auth_file(&path, &map) +} + fn is_expired(store: &TokenStore) -> bool { let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -536,4 +625,58 @@ mod tests { let expected = URL_SAFE_NO_PAD.encode(Sha256::digest(verifier.as_bytes())); assert_eq!(challenge, expected); } + + #[test] + fn read_auth_file_migrates_legacy_single_tenant_format() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let legacy = serde_json::to_string_pretty(&make_store(9_999_999_999)).unwrap(); + std::fs::write(&path, legacy).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 1); + assert_eq!( + map.get(CODEX_NAMESPACE).unwrap().access_token, + "test_access_token_value" + ); + } + + #[test] + fn read_auth_file_parses_new_namespaced_format() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), make_store(1)); + input.insert("mcp:linear".to_string(), make_store(2)); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(map.get("codex").unwrap().expires_at, 1); + assert_eq!(map.get("mcp:linear").unwrap().expires_at, 2); + } + + #[test] + fn write_auth_file_round_trips_through_disk() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("mcp:github".to_string(), make_store(42)); + write_auth_file(&path, &input).unwrap(); + let raw = std::fs::read_to_string(&path).unwrap(); + assert!(raw.contains("mcp:github")); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.get("mcp:github").unwrap().expires_at, 42); + } + + #[cfg(unix)] + #[test] + fn write_auth_file_creates_file_with_0600_mode() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), make_store(0)); + write_auth_file(&path, &input).unwrap(); + let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); + } } From 75fbd778943266d1da4cdffe4bcaf5c3aaea5a8a Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:31:46 +0000 Subject: [PATCH 54/98] fix(openab-agent/auth): rustfmt break-after-= for long chained let MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI cargo fmt --check rejected the chain-break form (`let map = read_auth_file(&path)\n .map_err(...)`) — for a single-method chain that fits inline at 99 chars, rustfmt prefers breaking after the `=` and keeping the chain whole. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 869a8e6af..6ffee5871 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -128,8 +128,8 @@ fn save_tokens(store: &TokenStore) -> Result<()> { #[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) pub fn load_namespaced_token(key: &str) -> Result { let path = auth_path(); - let map = read_auth_file(&path) - .map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + let map = + read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; map.get(key) .cloned() .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) From 38765abb576bd48e1f4750bcf928836e1748f43e Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:46:43 +0000 Subject: [PATCH 55/98] =?UTF-8?q?feat(openab-agent/mcp):=20add=20OAuth=20p?= =?UTF-8?q?rovider=20catalog=20(ADR=20=C2=A76.2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Built-in ProviderSpec table + resolve() that hands callers a (spec, scopes) pair. Empty cfg.scopes falls back to spec defaults; non-empty replaces them entirely. Custom providers (§6.3) deferred — anything not in the built-in list errors out for now. Module-level #![allow(dead_code)] since the first prod caller is the §6.4 login flow slice; until then only the unit tests below exercise this code. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/mod.rs | 1 + openab-agent/src/mcp/oauth.rs | 139 ++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 openab-agent/src/mcp/oauth.rs diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 55f210c16..17884c9c1 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -2,6 +2,7 @@ pub mod config; pub mod meta_tool; +pub mod oauth; pub mod runtime; use serde_json::json; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs new file mode 100644 index 000000000..e05f7d77e --- /dev/null +++ b/openab-agent/src/mcp/oauth.rs @@ -0,0 +1,139 @@ +//! OAuth provider catalog (ADR §6.2). Wiring into the rmcp Streamable HTTP +//! transport + agent-guided flows (§6.4) lands in subsequent slices; this +//! module is the data layer the login / refresh code will dispatch through. +//! +//! Scopes are stored as `&'static [&'static str]` so callers can join them +//! with the space delimiter the OAuth 2.1 spec mandates without owning a +//! `Vec`. Per-server overrides (`OAuthConfig.scopes`) replace the defaults +//! and pay for a `Vec` at the boundary. + +// The §6.4 login slice is the first prod caller — until then, every item +// here is reachable only via the unit tests below, so `cargo clippy +// --features mcp -- -D warnings` would flag them as dead. Module-scope +// allow rather than per-item once that slice lands. +#![allow(dead_code)] + +use anyhow::{anyhow, Result}; + +use super::config::OAuthConfig; + +/// Static description of a single OAuth provider — URLs + the loopback +/// redirect the §6.4 browser flow listens on. `default_scopes` is the +/// minimum set the agent will request when `oauth.scopes` is omitted +/// from the server config; per-server overrides win when present. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ProviderSpec { + pub authorize_url: &'static str, + pub token_url: &'static str, + pub callback: &'static str, + pub default_scopes: &'static [&'static str], +} + +/// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` +/// is the broadest grant; consumers should narrow via per-server overrides. +pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { + authorize_url: "https://claude.ai/oauth/authorize", + token_url: "https://platform.claude.com/v1/oauth/token", + callback: "http://localhost:53692/callback", + default_scopes: &[ + "org:create_api_key", + "user:profile", + "user:inference", + "user:sessions:claude_code", + "user:mcp_servers", + "user:file_upload", + ], +}; + +/// Look up a built-in `ProviderSpec` by config name. Returns `None` for +/// custom providers (handled by §6.3 once `OAuthConfig` grows the URL +/// fields) and for unknown names. +pub fn builtin(name: &str) -> Option { + match name { + "anthropic-mcp" => Some(ANTHROPIC_MCP), + _ => None, + } +} + +/// Resolve a server's `oauth:` block to a `ProviderSpec` plus the effective +/// scope list. `OAuthConfig::scopes`, when non-empty, replaces the spec's +/// defaults entirely — the caller never needs to merge. +/// +/// Custom providers (per ADR §6.3) require `OAuthConfig` to grow +/// `authorize_url` / `token_url` fields; until that lands, an `oauth:` +/// block without a known `provider` is an error. +pub fn resolve(cfg: &OAuthConfig) -> Result<(ProviderSpec, Vec)> { + let provider = cfg + .provider + .as_deref() + .ok_or_else(|| anyhow!("oauth.provider is required (custom providers land in §6.3)"))?; + let spec = builtin(provider) + .ok_or_else(|| anyhow!("unknown oauth provider {provider:?} (built-ins: anthropic-mcp)"))?; + let scopes = if cfg.scopes.is_empty() { + spec.default_scopes.iter().map(|s| s.to_string()).collect() + } else { + cfg.scopes.clone() + }; + Ok((spec, scopes)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn anthropic_mcp_spec_matches_adr_table() { + let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); + assert_eq!(spec.authorize_url, "https://claude.ai/oauth/authorize"); + assert_eq!(spec.token_url, "https://platform.claude.com/v1/oauth/token"); + assert_eq!(spec.callback, "http://localhost:53692/callback"); + assert!(spec.default_scopes.contains(&"user:mcp_servers")); + } + + #[test] + fn unknown_provider_returns_none() { + assert!(builtin("does-not-exist").is_none()); + assert!(builtin("").is_none()); + } + + #[test] + fn resolve_uses_default_scopes_when_config_omits_them() { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + scopes: vec![], + }; + let (spec, scopes) = resolve(&cfg).unwrap(); + assert_eq!(spec, ANTHROPIC_MCP); + assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + } + + #[test] + fn resolve_uses_config_scopes_when_provided() { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + scopes: vec!["user:profile".to_string(), "user:inference".to_string()], + }; + let (_, scopes) = resolve(&cfg).unwrap(); + assert_eq!(scopes, vec!["user:profile", "user:inference"]); + } + + #[test] + fn resolve_rejects_missing_provider() { + let cfg = OAuthConfig { + provider: None, + scopes: vec![], + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("required"), "got: {err}"); + } + + #[test] + fn resolve_rejects_unknown_provider() { + let cfg = OAuthConfig { + provider: Some("github-copilot".to_string()), + scopes: vec![], + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("unknown oauth provider"), "got: {err}"); + } +} From bed866e8d998f6a51209dd55f84c8c71feac262c Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:55:31 +0000 Subject: [PATCH 56/98] =?UTF-8?q?feat(openab-agent/mcp):=20OAuthConfig=20?= =?UTF-8?q?=C2=A76.3=20fields=20+=20discovery=20boot=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends OAuthConfig with authorize_url / token_url / client_id / device_authorization_endpoint / discovery / discovery_allowlist so custom OAuth 2.1 providers can be declared inline. Adds validate() that rejects discovery=true without an explicit allowlist (RFC 8414 SSRF guard, ADR §6.3 / §6.4) and hooks it into load_layered. oauth.rs tests switch to ..Default::default() so future field additions don't churn the test struct literals. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 130 ++++++++++++++++++++++++++++++++- openab-agent/src/mcp/oauth.rs | 10 +-- 2 files changed, 130 insertions(+), 10 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 89734917b..76af13ddf 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -57,15 +57,46 @@ pub struct ToolFilter { pub exclude: Vec, } -/// OAuth block. Phase 1 only parses `provider` + `scopes`; custom-provider -/// fields (§6.3: `authorize_url`, `token_url`, `device_authorization_endpoint`, -/// `discovery`, `discovery_allowlist`) land with the Phase 2 auth slice. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// OAuth block. +/// +/// `provider` selects a built-in spec from `oauth::builtin()`. Setting it +/// to an unknown name + supplying `authorize_url` / `token_url` defines a +/// custom OAuth 2.1 provider (ADR §6.3). `discovery: true` opts into +/// RFC 8414 dynamic discovery and requires a non-empty +/// `discovery_allowlist` of domains (§6.4 SSRF guard). +#[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct OAuthConfig { #[serde(default)] pub provider: Option, #[serde(default)] pub scopes: Vec, + #[serde(default)] + pub authorize_url: Option, + #[serde(default)] + pub token_url: Option, + #[serde(default)] + pub client_id: Option, + #[serde(default)] + pub device_authorization_endpoint: Option, + #[serde(default)] + pub discovery: bool, + #[serde(default)] + pub discovery_allowlist: Vec, +} + +impl OAuthConfig { + /// Boot-time validation (ADR §6.3 / §6.4). `discovery: true` without an + /// explicit allowlist is rejected — RFC 8414 lookups in multi-tenant + /// deployments would otherwise become an SSRF vector. + pub fn validate(&self, server: &str) -> Result<()> { + if self.discovery && self.discovery_allowlist.is_empty() { + return Err(anyhow!( + "mcp server {server:?}: oauth.discovery=true requires \ + a non-empty oauth.discovery_allowlist (ADR §6.3)" + )); + } + Ok(()) + } } impl McpConfig { @@ -89,9 +120,21 @@ impl McpConfig { let layer = Self::load_file(path)?; merged.servers.extend(layer.servers); } + merged.validate()?; Ok(merged) } + /// Validate every server's `oauth` block (ADR §6.3 boot check). Returns + /// the first failure — finer-grained per-server isolation lives in §5.6. + pub fn validate(&self) -> Result<()> { + for (name, server) in &self.servers { + if let ServerConfig::Http { oauth: Some(oauth), .. } = server { + oauth.validate(name)?; + } + } + Ok(()) + } + fn load_file(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .with_context(|| format!("read mcp config {}", path.display()))?; @@ -288,4 +331,83 @@ mod tests { _ => unreachable!(), } } + + #[test] + fn parses_custom_oauth_provider_fields() { + let json = r#"{ + "mcpServers": { + "custom": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { + "provider": "custom", + "authorize_url": "https://example.com/oauth/authorize", + "token_url": "https://example.com/oauth/token", + "client_id": "abc123", + "device_authorization_endpoint": "https://example.com/oauth/device", + "discovery": true, + "discovery_allowlist": ["*.example.com"] + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let ServerConfig::Http { oauth: Some(oauth), .. } = cfg.servers.get("custom").unwrap() + else { + panic!("expected http with oauth"); + }; + assert_eq!( + oauth.authorize_url.as_deref(), + Some("https://example.com/oauth/authorize"), + ); + assert_eq!( + oauth.token_url.as_deref(), + Some("https://example.com/oauth/token"), + ); + assert_eq!(oauth.client_id.as_deref(), Some("abc123")); + assert_eq!( + oauth.device_authorization_endpoint.as_deref(), + Some("https://example.com/oauth/device"), + ); + assert!(oauth.discovery); + assert_eq!(oauth.discovery_allowlist, vec!["*.example.com".to_string()]); + } + + #[test] + fn validate_rejects_discovery_without_allowlist() { + let oauth = OAuthConfig { + provider: Some("custom".into()), + discovery: true, + ..Default::default() + }; + let err = oauth.validate("srv").unwrap_err().to_string(); + assert!(err.contains("discovery_allowlist"), "got: {err}"); + assert!(err.contains("srv"), "got: {err}"); + } + + #[test] + fn validate_accepts_discovery_with_allowlist() { + let oauth = OAuthConfig { + provider: Some("custom".into()), + discovery: true, + discovery_allowlist: vec!["*.example.com".into()], + ..Default::default() + }; + oauth.validate("srv").unwrap(); + } + + #[test] + fn load_layered_rejects_invalid_discovery_config() { + let dir = tempfile::tempdir().unwrap(); + let project = dir.path().join("project.json"); + std::fs::write( + &project, + r#"{"mcpServers":{"bad":{"type":"http","url":"https://example.com","oauth":{"provider":"custom","discovery":true}}}}"#, + ) + .unwrap(); + let err = McpConfig::load_layered(None, Some(&project)) + .unwrap_err() + .to_string(); + assert!(err.contains("discovery_allowlist"), "got: {err}"); + } } diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index e05f7d77e..e31d9807b 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -100,7 +100,7 @@ mod tests { fn resolve_uses_default_scopes_when_config_omits_them() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), - scopes: vec![], + ..Default::default() }; let (spec, scopes) = resolve(&cfg).unwrap(); assert_eq!(spec, ANTHROPIC_MCP); @@ -112,6 +112,7 @@ mod tests { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), scopes: vec!["user:profile".to_string(), "user:inference".to_string()], + ..Default::default() }; let (_, scopes) = resolve(&cfg).unwrap(); assert_eq!(scopes, vec!["user:profile", "user:inference"]); @@ -119,10 +120,7 @@ mod tests { #[test] fn resolve_rejects_missing_provider() { - let cfg = OAuthConfig { - provider: None, - scopes: vec![], - }; + let cfg = OAuthConfig::default(); let err = resolve(&cfg).unwrap_err().to_string(); assert!(err.contains("required"), "got: {err}"); } @@ -131,7 +129,7 @@ mod tests { fn resolve_rejects_unknown_provider() { let cfg = OAuthConfig { provider: Some("github-copilot".to_string()), - scopes: vec![], + ..Default::default() }; let err = resolve(&cfg).unwrap_err().to_string(); assert!(err.contains("unknown oauth provider"), "got: {err}"); From d901a3210ebc12e5b4e3e3ed541180852bab2739 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:10:55 +0000 Subject: [PATCH 57/98] fix(openab-agent/mcp): rustfmt struct-pattern nested-call multi-line Burned Tick 30: `if let ServerConfig::Http { oauth: Some(oauth), .. } = server` at 74 chars fmt-rejects because the nested `Some(oauth)` binding forces multi-line struct-pattern formatting regardless of total line width. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 76af13ddf..db18a9779 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -128,7 +128,10 @@ impl McpConfig { /// the first failure — finer-grained per-server isolation lives in §5.6. pub fn validate(&self) -> Result<()> { for (name, server) in &self.servers { - if let ServerConfig::Http { oauth: Some(oauth), .. } = server { + if let ServerConfig::Http { + oauth: Some(oauth), .. + } = server + { oauth.validate(name)?; } } @@ -352,7 +355,9 @@ mod tests { } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let ServerConfig::Http { oauth: Some(oauth), .. } = cfg.servers.get("custom").unwrap() + let ServerConfig::Http { + oauth: Some(oauth), .. + } = cfg.servers.get("custom").unwrap() else { panic!("expected http with oauth"); }; From bb6e236edcf9991ece40cda5298477d1fbc70894 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:23:57 +0000 Subject: [PATCH 58/98] =?UTF-8?q?feat(openab-agent/mcp):=20resolve=20custo?= =?UTF-8?q?m=20OAuth=20providers=20(ADR=20=C2=A76.3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolve() now returns ResolvedProvider (owned strings) instead of (ProviderSpec, Vec). Built-in providers fill it from their static spec; unknown providers fall through to the custom path, which requires authorize_url + token_url and propagates client_id / device_authorization_endpoint when supplied. callback is None for custom (§6.4 picks the port at login time). Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 167 ++++++++++++++++++++++++++-------- 1 file changed, 131 insertions(+), 36 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index e31d9807b..52d61a36f 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -1,11 +1,7 @@ -//! OAuth provider catalog (ADR §6.2). Wiring into the rmcp Streamable HTTP -//! transport + agent-guided flows (§6.4) lands in subsequent slices; this -//! module is the data layer the login / refresh code will dispatch through. -//! -//! Scopes are stored as `&'static [&'static str]` so callers can join them -//! with the space delimiter the OAuth 2.1 spec mandates without owning a -//! `Vec`. Per-server overrides (`OAuthConfig.scopes`) replace the defaults -//! and pay for a `Vec` at the boundary. +//! OAuth provider catalog (ADR §6.2) + custom-provider resolution (§6.3). +//! Wiring into the rmcp Streamable HTTP transport + agent-guided flows +//! (§6.4) lands in subsequent slices; this module is the data layer the +//! login / refresh code will dispatch through. // The §6.4 login slice is the first prod caller — until then, every item // here is reachable only via the unit tests below, so `cargo clippy @@ -17,9 +13,8 @@ use anyhow::{anyhow, Result}; use super::config::OAuthConfig; -/// Static description of a single OAuth provider — URLs + the loopback -/// redirect the §6.4 browser flow listens on. `default_scopes` is the -/// minimum set the agent will request when `oauth.scopes` is omitted +/// Static description of a single built-in OAuth provider. `default_scopes` +/// is the minimum set the agent will request when `oauth.scopes` is omitted /// from the server config; per-server overrides win when present. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ProviderSpec { @@ -46,8 +41,7 @@ pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { }; /// Look up a built-in `ProviderSpec` by config name. Returns `None` for -/// custom providers (handled by §6.3 once `OAuthConfig` grows the URL -/// fields) and for unknown names. +/// custom providers (§6.3) and for unknown names. pub fn builtin(name: &str) -> Option { match name { "anthropic-mcp" => Some(ANTHROPIC_MCP), @@ -55,26 +49,71 @@ pub fn builtin(name: &str) -> Option { } } -/// Resolve a server's `oauth:` block to a `ProviderSpec` plus the effective -/// scope list. `OAuthConfig::scopes`, when non-empty, replaces the spec's -/// defaults entirely — the caller never needs to merge. +/// Effective per-server OAuth parameters after resolving the built-in catalog +/// and `OAuthConfig` overrides. `callback` is `None` for custom providers +/// (§6.4 picks a free port at login time); built-ins pin theirs. `client_id` +/// is `None` for built-ins (the per-provider flow code in §6.4 owns it) and +/// optional for custom providers — OAuth 2.1 servers vary on whether public +/// clients must register. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ResolvedProvider { + pub authorize_url: String, + pub token_url: String, + pub client_id: Option, + pub callback: Option, + pub device_authorization_endpoint: Option, + pub scopes: Vec, +} + +/// Resolve a server's `oauth:` block. Built-in providers come from +/// `builtin()`; unknown providers fall through to the §6.3 custom path, +/// which requires `authorize_url` + `token_url` on the config. /// -/// Custom providers (per ADR §6.3) require `OAuthConfig` to grow -/// `authorize_url` / `token_url` fields; until that lands, an `oauth:` -/// block without a known `provider` is an error. -pub fn resolve(cfg: &OAuthConfig) -> Result<(ProviderSpec, Vec)> { +/// `OAuthConfig::scopes`, when non-empty, replaces the spec's defaults +/// entirely — the caller never needs to merge. +pub fn resolve(cfg: &OAuthConfig) -> Result { let provider = cfg .provider .as_deref() - .ok_or_else(|| anyhow!("oauth.provider is required (custom providers land in §6.3)"))?; - let spec = builtin(provider) - .ok_or_else(|| anyhow!("unknown oauth provider {provider:?} (built-ins: anthropic-mcp)"))?; + .ok_or_else(|| anyhow!("oauth.provider is required"))?; + if let Some(spec) = builtin(provider) { + Ok(resolve_builtin(spec, cfg)) + } else { + resolve_custom(provider, cfg) + } +} + +fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { let scopes = if cfg.scopes.is_empty() { spec.default_scopes.iter().map(|s| s.to_string()).collect() } else { cfg.scopes.clone() }; - Ok((spec, scopes)) + ResolvedProvider { + authorize_url: spec.authorize_url.to_string(), + token_url: spec.token_url.to_string(), + client_id: None, + callback: Some(spec.callback.to_string()), + device_authorization_endpoint: None, + scopes, + } +} + +fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result { + let authorize_url = cfg.authorize_url.clone().ok_or_else(|| { + anyhow!("custom oauth provider {provider:?}: oauth.authorize_url is required (ADR §6.3)") + })?; + let token_url = cfg.token_url.clone().ok_or_else(|| { + anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") + })?; + Ok(ResolvedProvider { + authorize_url, + token_url, + client_id: cfg.client_id.clone(), + callback: None, + device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), + scopes: cfg.scopes.clone(), + }) } #[cfg(test)] @@ -97,41 +136,97 @@ mod tests { } #[test] - fn resolve_uses_default_scopes_when_config_omits_them() { + fn resolve_builtin_uses_default_scopes_when_config_omits_them() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), ..Default::default() }; - let (spec, scopes) = resolve(&cfg).unwrap(); - assert_eq!(spec, ANTHROPIC_MCP); - assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + let r = resolve(&cfg).unwrap(); + assert_eq!(r.authorize_url, ANTHROPIC_MCP.authorize_url); + assert_eq!(r.callback.as_deref(), Some(ANTHROPIC_MCP.callback)); + assert_eq!(r.scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + assert!(r.client_id.is_none()); + assert!(r.device_authorization_endpoint.is_none()); } #[test] - fn resolve_uses_config_scopes_when_provided() { + fn resolve_builtin_uses_config_scopes_when_provided() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), scopes: vec!["user:profile".to_string(), "user:inference".to_string()], ..Default::default() }; - let (_, scopes) = resolve(&cfg).unwrap(); - assert_eq!(scopes, vec!["user:profile", "user:inference"]); + let r = resolve(&cfg).unwrap(); + assert_eq!(r.scopes, vec!["user:profile", "user:inference"]); } #[test] fn resolve_rejects_missing_provider() { - let cfg = OAuthConfig::default(); - let err = resolve(&cfg).unwrap_err().to_string(); + let err = resolve(&OAuthConfig::default()).unwrap_err().to_string(); assert!(err.contains("required"), "got: {err}"); } #[test] - fn resolve_rejects_unknown_provider() { + fn resolve_custom_uses_config_urls_and_propagates_device_endpoint() { + let cfg = OAuthConfig { + provider: Some("linear".to_string()), + authorize_url: Some("https://linear.app/oauth/authorize".to_string()), + token_url: Some("https://api.linear.app/oauth/token".to_string()), + client_id: Some("client-abc".to_string()), + device_authorization_endpoint: Some("https://linear.app/oauth/device".to_string()), + scopes: vec!["read".to_string(), "write".to_string()], + ..Default::default() + }; + let r = resolve(&cfg).unwrap(); + assert_eq!(r.authorize_url, "https://linear.app/oauth/authorize"); + assert_eq!(r.token_url, "https://api.linear.app/oauth/token"); + assert_eq!(r.client_id.as_deref(), Some("client-abc")); + assert_eq!( + r.device_authorization_endpoint.as_deref(), + Some("https://linear.app/oauth/device"), + ); + assert!( + r.callback.is_none(), + "custom providers defer callback to login-time port allocation", + ); + assert_eq!(r.scopes, vec!["read", "write"]); + } + + #[test] + fn resolve_custom_minimal_two_urls_only() { + let cfg = OAuthConfig { + provider: Some("acme".to_string()), + authorize_url: Some("https://acme.example/authorize".to_string()), + token_url: Some("https://acme.example/token".to_string()), + ..Default::default() + }; + let r = resolve(&cfg).unwrap(); + assert!(r.client_id.is_none()); + assert!(r.device_authorization_endpoint.is_none()); + assert!(r.callback.is_none()); + assert!(r.scopes.is_empty()); + } + + #[test] + fn resolve_custom_rejects_missing_authorize_url() { + let cfg = OAuthConfig { + provider: Some("custom".to_string()), + token_url: Some("https://example.com/token".to_string()), + ..Default::default() + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("authorize_url"), "got: {err}"); + assert!(err.contains("custom"), "got: {err}"); + } + + #[test] + fn resolve_custom_rejects_missing_token_url() { let cfg = OAuthConfig { - provider: Some("github-copilot".to_string()), + provider: Some("custom".to_string()), + authorize_url: Some("https://example.com/authorize".to_string()), ..Default::default() }; let err = resolve(&cfg).unwrap_err().to_string(); - assert!(err.contains("unknown oauth provider"), "got: {err}"); + assert!(err.contains("token_url"), "got: {err}"); } } From 8d3781418874c54852a33e282eb73674f3ba5172 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:34:14 +0000 Subject: [PATCH 59/98] refactor(openab-agent/mcp): ResolvedProvider as enum {Builtin, Custom} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Mira's Tick 32 review: encode "callback always pinned for built-ins, dynamic for custom" and "client_id owned by §6.4 for built-ins, from config for custom" as variants instead of convention-based Option fields. §6.4 caller gets exhaustive matching for free. ProviderSpec gains a `name: &'static str` field so the resolver can copy it into Builtin's provider_name without re-matching the catalog key. BUILTINS slice replaces the duplicated string-key match in builtin() — single source of truth. authorize_url() / token_url() / scopes() getters keep the call sites that don't care about the variant from drowning in `match`. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 157 +++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 51 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 52d61a36f..c18ddddc6 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -18,6 +18,7 @@ use super::config::OAuthConfig; /// from the server config; per-server overrides win when present. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ProviderSpec { + pub name: &'static str, pub authorize_url: &'static str, pub token_url: &'static str, pub callback: &'static str, @@ -27,6 +28,7 @@ pub struct ProviderSpec { /// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` /// is the broadest grant; consumers should narrow via per-server overrides. pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { + name: "anthropic-mcp", authorize_url: "https://claude.ai/oauth/authorize", token_url: "https://platform.claude.com/v1/oauth/token", callback: "http://localhost:53692/callback", @@ -40,46 +42,86 @@ pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { ], }; +const BUILTINS: &[ProviderSpec] = &[ANTHROPIC_MCP]; + /// Look up a built-in `ProviderSpec` by config name. Returns `None` for /// custom providers (§6.3) and for unknown names. pub fn builtin(name: &str) -> Option { - match name { - "anthropic-mcp" => Some(ANTHROPIC_MCP), - _ => None, - } + BUILTINS.iter().copied().find(|spec| spec.name == name) } /// Effective per-server OAuth parameters after resolving the built-in catalog -/// and `OAuthConfig` overrides. `callback` is `None` for custom providers -/// (§6.4 picks a free port at login time); built-ins pin theirs. `client_id` -/// is `None` for built-ins (the per-provider flow code in §6.4 owns it) and -/// optional for custom providers — OAuth 2.1 servers vary on whether public -/// clients must register. +/// and `OAuthConfig` overrides. +/// +/// The two variants encode invariants that an `Option`-heavy struct couldn't: +/// built-ins always pin a `callback` (their PKCE port is hard-coded in the +/// provider's app registration) and never carry a `client_id` (the §6.4 flow +/// code owns it, mirroring `auth.rs::codex_client_id()`). Custom providers +/// flip both: §6.4 allocates a free port at login time, and `client_id` +/// comes from config (OAuth 2.1 public clients vary on registration). +/// +/// `device_authorization_endpoint` only appears on `Custom` — adding device +/// support for a built-in provider is a `ProviderSpec` schema change, not a +/// config flag. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct ResolvedProvider { - pub authorize_url: String, - pub token_url: String, - pub client_id: Option, - pub callback: Option, - pub device_authorization_endpoint: Option, - pub scopes: Vec, +pub enum ResolvedProvider { + Builtin { + provider_name: &'static str, + authorize_url: &'static str, + token_url: &'static str, + callback: &'static str, + scopes: Vec, + }, + Custom { + provider_name: String, + authorize_url: String, + token_url: String, + client_id: Option, + device_authorization_endpoint: Option, + scopes: Vec, + }, +} + +impl ResolvedProvider { + /// Accessor for the shared `authorize_url` field. Callers that don't + /// need to distinguish built-in vs custom can skip the `match`. + pub fn authorize_url(&self) -> &str { + match self { + Self::Builtin { authorize_url, .. } => authorize_url, + Self::Custom { authorize_url, .. } => authorize_url, + } + } + + /// Accessor for the shared `token_url` field. + pub fn token_url(&self) -> &str { + match self { + Self::Builtin { token_url, .. } => token_url, + Self::Custom { token_url, .. } => token_url, + } + } + + /// Accessor for the shared scope list. + pub fn scopes(&self) -> &[String] { + match self { + Self::Builtin { scopes, .. } | Self::Custom { scopes, .. } => scopes, + } + } } /// Resolve a server's `oauth:` block. Built-in providers come from /// `builtin()`; unknown providers fall through to the §6.3 custom path, /// which requires `authorize_url` + `token_url` on the config. /// -/// `OAuthConfig::scopes`, when non-empty, replaces the spec's defaults +/// `OAuthConfig::scopes`, when non-empty, replaces the built-in defaults /// entirely — the caller never needs to merge. pub fn resolve(cfg: &OAuthConfig) -> Result { let provider = cfg .provider .as_deref() .ok_or_else(|| anyhow!("oauth.provider is required"))?; - if let Some(spec) = builtin(provider) { - Ok(resolve_builtin(spec, cfg)) - } else { - resolve_custom(provider, cfg) + match builtin(provider) { + Some(spec) => Ok(resolve_builtin(spec, cfg)), + None => resolve_custom(provider, cfg), } } @@ -89,12 +131,11 @@ fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { } else { cfg.scopes.clone() }; - ResolvedProvider { - authorize_url: spec.authorize_url.to_string(), - token_url: spec.token_url.to_string(), - client_id: None, - callback: Some(spec.callback.to_string()), - device_authorization_endpoint: None, + ResolvedProvider::Builtin { + provider_name: spec.name, + authorize_url: spec.authorize_url, + token_url: spec.token_url, + callback: spec.callback, scopes, } } @@ -106,11 +147,11 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result let token_url = cfg.token_url.clone().ok_or_else(|| { anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") })?; - Ok(ResolvedProvider { + Ok(ResolvedProvider::Custom { + provider_name: provider.to_string(), authorize_url, token_url, client_id: cfg.client_id.clone(), - callback: None, device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), scopes: cfg.scopes.clone(), }) @@ -141,12 +182,15 @@ mod tests { provider: Some("anthropic-mcp".to_string()), ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.authorize_url, ANTHROPIC_MCP.authorize_url); - assert_eq!(r.callback.as_deref(), Some(ANTHROPIC_MCP.callback)); - assert_eq!(r.scopes.len(), ANTHROPIC_MCP.default_scopes.len()); - assert!(r.client_id.is_none()); - assert!(r.device_authorization_endpoint.is_none()); + let ResolvedProvider::Builtin { + provider_name, callback, scopes, .. + } = resolve(&cfg).unwrap() + else { + panic!("expected Builtin variant"); + }; + assert_eq!(provider_name, "anthropic-mcp"); + assert_eq!(callback, ANTHROPIC_MCP.callback); + assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); } #[test] @@ -157,7 +201,7 @@ mod tests { ..Default::default() }; let r = resolve(&cfg).unwrap(); - assert_eq!(r.scopes, vec!["user:profile", "user:inference"]); + assert_eq!(r.scopes(), &["user:profile", "user:inference"]); } #[test] @@ -177,19 +221,26 @@ mod tests { scopes: vec!["read".to_string(), "write".to_string()], ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.authorize_url, "https://linear.app/oauth/authorize"); - assert_eq!(r.token_url, "https://api.linear.app/oauth/token"); - assert_eq!(r.client_id.as_deref(), Some("client-abc")); + let ResolvedProvider::Custom { + provider_name, + authorize_url, + token_url, + client_id, + device_authorization_endpoint, + scopes, + } = resolve(&cfg).unwrap() + else { + panic!("expected Custom variant"); + }; + assert_eq!(provider_name, "linear"); + assert_eq!(authorize_url, "https://linear.app/oauth/authorize"); + assert_eq!(token_url, "https://api.linear.app/oauth/token"); + assert_eq!(client_id.as_deref(), Some("client-abc")); assert_eq!( - r.device_authorization_endpoint.as_deref(), + device_authorization_endpoint.as_deref(), Some("https://linear.app/oauth/device"), ); - assert!( - r.callback.is_none(), - "custom providers defer callback to login-time port allocation", - ); - assert_eq!(r.scopes, vec!["read", "write"]); + assert_eq!(scopes, vec!["read", "write"]); } #[test] @@ -200,11 +251,15 @@ mod tests { token_url: Some("https://acme.example/token".to_string()), ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert!(r.client_id.is_none()); - assert!(r.device_authorization_endpoint.is_none()); - assert!(r.callback.is_none()); - assert!(r.scopes.is_empty()); + let ResolvedProvider::Custom { + client_id, device_authorization_endpoint, scopes, .. + } = resolve(&cfg).unwrap() + else { + panic!("expected Custom variant"); + }; + assert!(client_id.is_none()); + assert!(device_authorization_endpoint.is_none()); + assert!(scopes.is_empty()); } #[test] From 2c112ddcc9f6aa86f71956617527bebff74c9219 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:41:05 +0000 Subject: [PATCH 60/98] fix(openab-agent/mcp): rustfmt per-line struct-pattern binders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Burned Tick 33: rustfmt's struct-pattern formatting splits ≥3 simple binders onto their own lines, even when the body would fit inline. Different rule from the nested-call case (Tick 30) where the body stays inline. Runbook updated with both rules side-by-side. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index c18ddddc6..fa6cb2497 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -183,7 +183,10 @@ mod tests { ..Default::default() }; let ResolvedProvider::Builtin { - provider_name, callback, scopes, .. + provider_name, + callback, + scopes, + .. } = resolve(&cfg).unwrap() else { panic!("expected Builtin variant"); @@ -252,7 +255,10 @@ mod tests { ..Default::default() }; let ResolvedProvider::Custom { - client_id, device_authorization_endpoint, scopes, .. + client_id, + device_authorization_endpoint, + scopes, + .. } = resolve(&cfg).unwrap() else { panic!("expected Custom variant"); From 21aa6221f6fdaeabb4b04c963e4352e5c45901eb Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:00:28 +0000 Subject: [PATCH 61/98] =?UTF-8?q?feat(openab-agent/mcp):=20paste-back=20OA?= =?UTF-8?q?uth=20flow=20primitives=20(ADR=20=C2=A76.4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New mcp::flow module exposes init_paste_authorize(provider, client_id, redirect_uri) -> PasteAuthorize, which generates the PKCE pair + state nonce internally and returns the authorize URL plus the secrets the caller must persist for complete_login to validate the callback. Internalizing pair generation removes a footgun (caller can't mismatch verifier/state) and shrinks the API to the two parameters that actually vary per call. auth::generate_pkce promoted from private to pub so the MCP flow path can share it with Codex — security primitive, single source of truth, no drift on future hardening. Module-scope #![allow(dead_code)] consistent with mcp::oauth — first prod caller (the §6.4 login orchestration) lands in the next slice. Tests cover URL structure, percent-encoding of redirect_uri, scope form-encoding, unparseable authorize_url error path, and custom-provider URL composition. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 2 +- openab-agent/src/mcp/flow.rs | 146 +++++++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 1 + 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 openab-agent/src/mcp/flow.rs diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 6ffee5871..ff289b298 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -227,7 +227,7 @@ async fn refresh_token(store: &TokenStore) -> Result { }) } -fn generate_pkce() -> (String, String) { +pub fn generate_pkce() -> (String, String) { let mut buf = [0u8; 32]; getrandom::fill(&mut buf).expect("getrandom failed"); let verifier = URL_SAFE_NO_PAD.encode(buf); diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs new file mode 100644 index 000000000..17b34b5ce --- /dev/null +++ b/openab-agent/src/mcp/flow.rs @@ -0,0 +1,146 @@ +//! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from +//! `crate::auth::generate_pkce` — shared with the Codex paths so a +//! security-primitive change can't drift between modules. Orchestration +//! (device polling, callback parsing) lands in subsequent slices. + +// First prod caller (§6.4 login orchestration) lands in the next slice; +// until then every item is reachable only via tests, so +// `clippy --features mcp -D warnings` would flag dead_code. +#![allow(dead_code)] + +use anyhow::Result; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; +use url::Url; + +use crate::auth::generate_pkce; +use super::oauth::ResolvedProvider; + +/// 16-byte URL-safe `state` nonce for the OAuth authorize URL. +fn generate_state() -> String { + let mut buf = [0u8; 16]; + getrandom::fill(&mut buf).expect("getrandom failed"); + URL_SAFE_NO_PAD.encode(buf) +} + +/// Result of `init_paste_authorize`: the URL to surface to the user, plus +/// the `code_verifier` + `state` the caller must persist under the +/// pending-login key for `complete_login` to validate the callback. +pub struct PasteAuthorize { + pub url: String, + pub code_verifier: String, + pub state: String, +} + +/// Start a paste-back OAuth 2.1 authorize flow. Generates the PKCE pair +/// and state nonce internally so the caller can't pair them up wrong; +/// builds the RFC 6749 authorize URL with `S256` PKCE and space-joined +/// scopes. `client_id` is caller-supplied: built-ins look it up via a +/// hard-coded helper (mirroring `auth::codex_client_id`); custom +/// providers carry it on `ResolvedProvider::Custom`. `redirect_uri` is +/// the provider's pinned callback for built-ins or a runtime-bound +/// `localhost:` for custom paste-back flows. +pub fn init_paste_authorize( + provider: &ResolvedProvider, + client_id: &str, + redirect_uri: &str, +) -> Result { + let (code_verifier, code_challenge) = generate_pkce(); + let state = generate_state(); + let mut url = Url::parse(provider.authorize_url())?; + url.query_pairs_mut() + .append_pair("response_type", "code") + .append_pair("client_id", client_id) + .append_pair("redirect_uri", redirect_uri) + .append_pair("code_challenge", &code_challenge) + .append_pair("code_challenge_method", "S256") + .append_pair("state", &state) + .append_pair("scope", &provider.scopes().join(" ")); + Ok(PasteAuthorize { + url: url.to_string(), + code_verifier, + state, + }) +} + +#[cfg(test)] +mod tests { + use crate::mcp::config::OAuthConfig; + use crate::mcp::oauth::resolve; + use super::*; + + const TEST_REDIRECT: &str = "http://localhost:53692/callback"; + + #[test] + fn state_is_url_safe_and_unique() { + let s = generate_state(); + let url_safe = s + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'); + assert!(url_safe); + assert_ne!(s, generate_state()); + } + + fn builtin_provider() -> ResolvedProvider { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + ..Default::default() + }; + resolve(&cfg).unwrap() + } + + #[test] + fn init_paste_authorize_threads_pkce_and_state_into_url() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "client-xyz", TEST_REDIRECT).unwrap(); + assert!(r.url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(r.url.contains("response_type=code")); + assert!(r.url.contains("client_id=client-xyz")); + assert!(r.url.contains("code_challenge_method=S256")); + assert!(r.url.contains(&format!("state={}", r.state))); + assert!(!r.code_verifier.is_empty()); + } + + #[test] + fn init_paste_authorize_percent_encodes_redirect_uri() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); + let want = "redirect_uri=http%3A%2F%2Flocalhost%3A53692%2Fcallback"; + assert!(r.url.contains(want)); + } + + #[test] + fn init_paste_authorize_form_encodes_scope_spaces_as_plus() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); + assert!(r.url.contains("scope=org%3Acreate_api_key")); + assert!(r.url.contains("user%3Amcp_servers")); + } + + #[test] + fn init_paste_authorize_rejects_unparseable_authorize_url() { + let cfg = OAuthConfig { + provider: Some("broken".to_string()), + authorize_url: Some("not a url".to_string()), + token_url: Some("https://example.com/token".to_string()), + ..Default::default() + }; + let p = resolve(&cfg).unwrap(); + assert!(init_paste_authorize(&p, "c", TEST_REDIRECT).is_err()); + } + + #[test] + fn init_paste_authorize_for_custom_provider() { + let cfg = OAuthConfig { + provider: Some("linear".to_string()), + authorize_url: Some("https://linear.app/oauth/authorize".to_string()), + token_url: Some("https://api.linear.app/oauth/token".to_string()), + client_id: Some("linear-client".to_string()), + scopes: vec!["read".to_string(), "write".to_string()], + ..Default::default() + }; + let p = resolve(&cfg).unwrap(); + let r = init_paste_authorize(&p, "linear-client", TEST_REDIRECT).unwrap(); + assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); + assert!(r.url.contains("scope=read+write")); + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 17884c9c1..81278aa4e 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,6 +1,7 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; +pub mod flow; pub mod meta_tool; pub mod oauth; pub mod runtime; From 99395b6108529e2a255a75ff74fd88d84c11978e Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:11:04 +0000 Subject: [PATCH 62/98] fix(openab-agent/mcp): rustfmt import precedence super < crate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rustfmt's reorder_imports does NOT sort by pure alphabetical order across use statements — local path roots have a fixed precedence: self < super < crate < external crates. My alphabetical assumption (c < s, so crate first) was wrong in both flow.rs's module-scope imports and its mod tests block. Burned Tick 36 (twice, same file). Runbook updated alongside the Tick 14 sub-module-vs-bare ordering note since the two rules apply at different scopes. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/flow.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index 17b34b5ce..caac9d340 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -12,8 +12,8 @@ use anyhow::Result; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; -use crate::auth::generate_pkce; use super::oauth::ResolvedProvider; +use crate::auth::generate_pkce; /// 16-byte URL-safe `state` nonce for the OAuth authorize URL. fn generate_state() -> String { @@ -64,9 +64,9 @@ pub fn init_paste_authorize( #[cfg(test)] mod tests { + use super::*; use crate::mcp::config::OAuthConfig; use crate::mcp::oauth::resolve; - use super::*; const TEST_REDIRECT: &str = "http://localhost:53692/callback"; From d8e29d7e3cd99e6ccde62a3ad50aa021f1394126 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:25:40 +0000 Subject: [PATCH 63/98] feat(openab-agent/mcp): NeedsAuth state for oauth-protected http servers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ServerStatus gains a NeedsAuth variant; McpRuntimeManager::connect() now transitions oauth-protected http servers into that state with an error pointing the user at `mcp login ` instead of staying silently Disconnected. Icon "◌" (U+25CC DOTTED CIRCLE) matches the existing geometric family (○ ◐ ●) rather than "⚠" — the latter is emoji-prone (Discord and many terminals upgrade it via VS16), which would break aligned status output. Status label "needs_auth" wired through meta_tool's snake_case status_label() table. Tests cover both the transition + error format and an idempotency guarantee: a second connect() on a NeedsAuth server must keep the state sticky (only successful `mcp login` clears it). Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 1 + openab-agent/src/mcp/runtime.rs | 58 +++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 22aafc639..6a8d6060f 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -189,6 +189,7 @@ fn status_label(status: &ServerStatus) -> &'static str { ServerStatus::Disconnected => "disconnected", ServerStatus::Connecting => "connecting", ServerStatus::Connected => "connected", + ServerStatus::NeedsAuth => "needs_auth", ServerStatus::Failed(_) => "failed", } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 38d03747f..944160612 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -24,12 +24,12 @@ use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -#[allow(dead_code)] // NeedsAuth lands with the Phase 2 OAuth slice (ADR §5.7) #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, Connecting, Connected, + NeedsAuth, Failed(String), } @@ -39,6 +39,7 @@ impl ServerStatus { ServerStatus::Disconnected => "○", ServerStatus::Connecting => "◐", ServerStatus::Connected => "●", + ServerStatus::NeedsAuth => "◌", ServerStatus::Failed(_) => "✗", } } @@ -147,8 +148,10 @@ impl McpRuntimeManager { } /// Lazy-connect the named server (ADR §5.7). Idempotent if already - /// `Connected` with a live client. HTTP servers requiring OAuth are - /// rejected until the Phase 2 auth slice lands (ADR §6). + /// `Connected` with a live client. HTTP servers with an `oauth:` block + /// are routed through `mcp login` first — `connect` marks them + /// `NeedsAuth` and returns an error pointing the caller at the login + /// subcommand rather than attempting an unauthenticated dial. pub async fn connect(&self, name: &str) -> Result<()> { let dial = { let mut guard = self.handles.write().await; @@ -163,17 +166,16 @@ impl McpRuntimeManager { ServerConfig::Stdio { command, args, env, .. } => Dial::Stdio { command, args, env }, - // Reject oauth-protected servers BEFORE the `Connecting` - // transition: we never attempted a handshake, so leaving - // status at `Disconnected` is the honest state. Status - // becomes `Failed` only when a dial was actually tried. - ServerConfig::Http { - oauth: Some(_), - url, - .. - } => { + // Oauth-protected servers can't be dialed via plain connect; + // mark `NeedsAuth` so `mcp status` shows a persistent + // "waiting for login" signal (vs `Disconnected`, which + // implies a plain `connect` would succeed). The `Failed` + // path remains reserved for dials that were attempted and + // failed at handshake. + ServerConfig::Http { oauth: Some(_), .. } => { + handle.status = ServerStatus::NeedsAuth; return Err(anyhow!( - "oauth-protected http server {url:?} requires the auth slice (Phase 2 §6)" + "mcp server {name:?} needs oauth login — run `mcp login {name}`" )); } ServerConfig::Http { url, .. } => Dial::Http { url }, @@ -338,7 +340,7 @@ mod tests { } #[tokio::test] - async fn connect_http_with_oauth_defers_to_auth_slice() { + async fn connect_http_with_oauth_marks_needs_auth() { let json = r#"{ "mcpServers": { "linear": { @@ -351,10 +353,30 @@ mod tests { let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); - assert!(err.contains("oauth"), "expected 'oauth' in {err}"); - // OAuth rejection happens BEFORE the Connecting transition, so the - // server remains Disconnected — no dial was attempted. - assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); + assert!(err.contains("needs oauth login"), "expected hint in {err}"); + assert!(err.contains("mcp login"), "expected 'mcp login' hint in {err}"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + } + + #[tokio::test] + async fn connect_oauth_twice_keeps_needs_auth_sticky() { + // Second connect() must NOT silently re-enter `Connecting` and + // shadow the user-actionable state — the only path out of + // `NeedsAuth` is a successful `mcp login`. + let json = r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + assert!(mgr.connect("linear").await.is_err()); + assert!(mgr.connect("linear").await.is_err()); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); } #[tokio::test] From 0da594c007b85cb5f25653c9e769c8ea7b77564b Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:30:36 +0000 Subject: [PATCH 64/98] style(openab-agent/mcp): split assert! args to satisfy rustfmt fn_call_width Tick 37's assert!(err.contains("mcp login"), "...") was 84 chars inline but rustfmt's default fn_call_width=60 measures the arg list and split it. Match the formatter. --- openab-agent/src/mcp/runtime.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 944160612..841ee8608 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -354,7 +354,10 @@ mod tests { let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); assert!(err.contains("needs oauth login"), "expected hint in {err}"); - assert!(err.contains("mcp login"), "expected 'mcp login' hint in {err}"); + assert!( + err.contains("mcp login"), + "expected 'mcp login' hint in {err}" + ); assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); } From dd2a97858536a09a7f5d61946f060361ed7ac5ce Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:49:05 +0000 Subject: [PATCH 65/98] feat(openab-agent/mcp): start_paste_login + builtin client_id resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit McpRuntimeManager::start_paste_login(server) wires flow::init_paste_authorize for built-in OAuth providers (ADR §6.4). The PKCE verifier + state are stashed in an in-memory pending_logins map (HashMap) for the next slice's complete_login to consume. Server status flips to NeedsAuth. Scope this slice: - Built-in providers only (anthropic-mcp). Custom-provider paste-back needs runtime callback port allocation; deferred to a follow-up slice. - Custom providers declaring device_authorization_endpoint short-circuit with an explicit "use device flow" error (ADR §6.4 selection logic). - ADR §6.4 says transient state lives "in TokenStore"; this slice keeps it in-process. auth.json needs a heterogeneous-entry schema change to hold non-token shapes — separate slice. oauth::builtin_client_id is the per-provider client_id resolver — env-var- required (no hard-coded default) so paste-back fails loud rather than emitting an authorize URL with a placeholder client_id. flow.rs sheds its module-level #![allow(dead_code)] now that init_paste_authorize has a prod caller transitively from start_paste_login (itself allow-dead-code until the next slice wires the mcp::login action). --- openab-agent/src/mcp/flow.rs | 5 - openab-agent/src/mcp/oauth.rs | 60 ++++++++ openab-agent/src/mcp/runtime.rs | 251 ++++++++++++++++++++++++++++++++ 3 files changed, 311 insertions(+), 5 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index caac9d340..39ed8b13c 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -3,11 +3,6 @@ //! security-primitive change can't drift between modules. Orchestration //! (device polling, callback parsing) lands in subsequent slices. -// First prod caller (§6.4 login orchestration) lands in the next slice; -// until then every item is reachable only via tests, so -// `clippy --features mcp -D warnings` would flag dead_code. -#![allow(dead_code)] - use anyhow::Result; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index fa6cb2497..6d75d7952 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -50,6 +50,29 @@ pub fn builtin(name: &str) -> Option { BUILTINS.iter().copied().find(|spec| spec.name == name) } +/// Resolve a built-in provider's OAuth `client_id`. Mirrors +/// `auth::codex_client_id`'s env-var-override pattern but without a hard- +/// coded default — the Anthropic MCP public client_id isn't yet pinned in +/// this repo, so requiring the env var fails fast with a useful error +/// rather than silently dialing with a placeholder. Replace with a +/// hard-coded default once a real value is published. +pub fn builtin_client_id(provider: &str) -> Result { + let env_var = match provider { + "anthropic-mcp" => "OPENAB_MCP_ANTHROPIC_CLIENT_ID", + other => { + return Err(anyhow!( + "no built-in client_id mapping for provider {other:?}" + )); + } + }; + std::env::var(env_var).map_err(|_| { + anyhow!( + "built-in provider {provider:?} requires env var {env_var} \ + (client_id of the provider's OAuth app)" + ) + }) +} + /// Effective per-server OAuth parameters after resolving the built-in catalog /// and `OAuthConfig` overrides. /// @@ -161,6 +184,43 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result mod tests { use super::*; + // Both env-touching tests below race the same OS env var; serialize + // them per the runbook's Tick 24 lesson (acp.rs ANTHROPIC_API_KEY race). + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + #[test] + fn builtin_client_id_requires_env_var() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + let err = builtin_client_id("anthropic-mcp") + .unwrap_err() + .to_string(); + assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); + } + + #[test] + fn builtin_client_id_uses_env_var_when_set() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-test-id"); + } + let id = builtin_client_id("anthropic-mcp").unwrap(); + assert_eq!(id, "anth-test-id"); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + } + + #[test] + fn builtin_client_id_rejects_unknown_provider() { + let err = builtin_client_id("does-not-exist").unwrap_err().to_string(); + assert!(err.contains("does-not-exist"), "got: {err}"); + } + #[test] fn anthropic_mcp_spec_matches_adr_table() { let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 841ee8608..b4a4a6494 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -23,6 +23,8 @@ use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; +use super::flow::init_paste_authorize; +use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { @@ -67,11 +69,39 @@ impl std::fmt::Debug for ServerHandle { } } +/// Transient per-server state captured at `start_paste_login` and consumed +/// by `complete_login` (next slice). `token_url` + `provider_name` are +/// snapshotted up front so a config edit between the two calls can't +/// silently redirect the token exchange. +/// +/// ADR §6.4 says this lives "in TokenStore"; this slice keeps it in +/// process memory only — `auth.json` would need a heterogeneous-entry +/// schema change to hold non-token shapes, deferred to its own slice. +#[derive(Debug, Clone)] +#[allow(dead_code)] // wired in next slice (complete_login) +pub struct PendingPasteLogin { + pub verifier: String, + pub state: String, + pub token_url: String, + pub provider_name: String, +} + +/// Public return of `start_paste_login`. The caller relays `authorize_url` +/// to the user; `state` is echoed so the agent can show / log it without +/// reaching into runtime internals. +#[derive(Debug, Clone)] +#[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) +pub struct PasteLoginStart { + pub authorize_url: String, + pub state: String, +} + /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. #[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { handles: Arc>>, + pending_logins: Arc>>, } impl McpRuntimeManager { @@ -91,6 +121,7 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), + pending_logins: Arc::new(RwLock::new(HashMap::new())), } } @@ -147,6 +178,86 @@ impl McpRuntimeManager { out } + /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` + /// block (ADR §6.4). Produces the authorize URL the agent surfaces to + /// the user; the matching PKCE verifier + `state` nonce are kept on + /// `self.pending_logins` for `complete_login` (next slice) to consume. + /// + /// Scoped to **built-in** providers this slice. Custom-provider + /// paste-back needs runtime port allocation for the callback (§6.4), + /// and any provider that advertises a `device_authorization_endpoint` + /// should run device-code instead (§6.4 selection logic). Both errors + /// are explicit so the LLM can pick a different action. + #[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) + pub async fn start_paste_login(&self, name: &str) -> Result { + let oauth_cfg = { + let guard = self.handles.read().await; + let handle = guard + .get(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + match handle.config.resolved(name)? { + ServerConfig::Http { + oauth: Some(oauth), .. + } => oauth, + ServerConfig::Http { oauth: None, .. } => { + return Err(anyhow!("mcp server {name:?} has no oauth block")); + } + ServerConfig::Stdio { .. } => { + return Err(anyhow!("mcp server {name:?} is stdio, not http+oauth")); + } + } + }; + + let provider = resolve(&oauth_cfg)?; + let (client_id, redirect_uri) = match &provider { + ResolvedProvider::Builtin { + provider_name, callback, .. + } => (builtin_client_id(provider_name)?, (*callback).to_string()), + ResolvedProvider::Custom { + device_authorization_endpoint: Some(_), .. + } => { + return Err(anyhow!( + "mcp server {name:?} has a device endpoint; use device flow" + )); + } + ResolvedProvider::Custom { .. } => { + return Err(anyhow!( + "mcp server {name:?}: custom-provider paste-back not yet supported" + )); + } + }; + + let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; + let pending = PendingPasteLogin { + verifier: started.code_verifier, + state: started.state.clone(), + token_url: provider.token_url().to_string(), + provider_name: provider_name_of(&provider), + }; + { + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::NeedsAuth; + } + } + self.pending_logins + .write() + .await + .insert(name.to_string(), pending); + Ok(PasteLoginStart { + authorize_url: started.url, + state: started.state, + }) + } + + /// Borrow the in-flight pending paste-login for `name`. Returns a + /// clone so callers don't hold the lock; `complete_login` (next + /// slice) is the intended consumer. + #[allow(dead_code)] // first prod caller is complete_login in next slice + pub async fn pending_paste_login(&self, name: &str) -> Option { + self.pending_logins.read().await.get(name).cloned() + } + /// Lazy-connect the named server (ADR §5.7). Idempotent if already /// `Connected` with a live client. HTTP servers with an `oauth:` block /// are routed through `mcp login` first — `connect` marks them @@ -211,6 +322,15 @@ impl McpRuntimeManager { } } +/// Stringified provider name for the pending-state record. `Builtin` keeps +/// its `&'static str` static; `Custom` already owns a `String`. +fn provider_name_of(provider: &ResolvedProvider) -> String { + match provider { + ResolvedProvider::Builtin { provider_name, .. } => (*provider_name).to_string(), + ResolvedProvider::Custom { provider_name, .. } => provider_name.clone(), + } +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -402,6 +522,137 @@ mod tests { } } + // start_paste_login + builtin_client_id race on the same env var. + // Same fix as oauth.rs / acp.rs (Tick 24 lesson). + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + fn linear_custom_cfg() -> &'static str { + r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { + "provider": "linear", + "authorize_url": "https://linear.app/oauth/authorize", + "token_url": "https://api.linear.app/oauth/token", + "client_id": "linear-client", + "scopes": ["read"] + } + } + } + }"# + } + + fn anthropic_builtin_cfg() -> &'static str { + r#"{ + "mcpServers": { + "anthro": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { "provider": "anthropic-mcp" } + } + } + }"# + } + + async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { + mgr.start_paste_login(name) + .await + .unwrap_err() + .to_string() + } + + #[tokio::test] + async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); + } + let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let start = mgr.start_paste_login("anthro").await.unwrap(); + assert!(start.authorize_url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(start.authorize_url.contains("client_id=anth-cid")); + assert!(start.authorize_url.contains(&format!("state={}", start.state))); + let pending = mgr.pending_paste_login("anthro").await.unwrap(); + assert_eq!(pending.state, start.state); + assert!(!pending.verifier.is_empty()); + assert_eq!( + pending.token_url, + "https://platform.claude.com/v1/oauth/token" + ); + assert_eq!(pending.provider_name, "anthropic-mcp"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + } + + #[tokio::test] + async fn start_paste_login_rejects_custom_provider_for_now() { + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "linear").await; + assert!(err.contains("custom-provider"), "got: {err}"); + assert!(mgr.pending_paste_login("linear").await.is_none()); + } + + #[tokio::test] + async fn start_paste_login_rejects_custom_with_device_endpoint() { + let json = r#"{ + "mcpServers": { + "dev": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { + "provider": "dev", + "authorize_url": "https://example.com/oauth/authorize", + "token_url": "https://example.com/oauth/token", + "device_authorization_endpoint": "https://example.com/oauth/device" + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "dev").await; + assert!(err.contains("device flow"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_rejects_stdio_server() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "fs").await; + assert!(err.contains("stdio"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_unknown_server_errors() { + let mgr = McpRuntimeManager::from_config(McpConfig::default()); + let err = start_login_err(&mgr, "ghost").await; + assert!(err.contains("ghost"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_builtin_without_env_var_errors_loud() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "anthro").await; + assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); + } + #[tokio::test] async fn connect_to_missing_binary_records_failed() { let json = r#"{ From 3d7db64957e0f47dc53c10679c84ea5a0df39549 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:51:23 +0000 Subject: [PATCH 66/98] style(openab-agent/mcp): satisfy rustfmt for Tick 39 slice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three distinct fmt rule misses on the start_paste_login slice: - oauth.rs: chain at receiver+chain = exactly 60 chars stays inline (over-broken on the unwrap_err+to_string chain) - runtime.rs: struct-pattern binders force per-line when arm body is also long enough that the inline form would overflow — different threshold than the existing connect()'s Stdio arm - runtime.rs: field-access chain ELEMENT counts toward chain_width; `start.authorize_url.starts_with(...)` is 2 chain elements, not 1, so receiver+chain over 60 → break --- openab-agent/src/mcp/oauth.rs | 4 +--- openab-agent/src/mcp/runtime.rs | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 6d75d7952..f3ea31661 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -195,9 +195,7 @@ mod tests { unsafe { std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); } - let err = builtin_client_id("anthropic-mcp") - .unwrap_err() - .to_string(); + let err = builtin_client_id("anthropic-mcp").unwrap_err().to_string(); assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index b4a4a6494..8271b5b95 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -211,10 +211,13 @@ impl McpRuntimeManager { let provider = resolve(&oauth_cfg)?; let (client_id, redirect_uri) = match &provider { ResolvedProvider::Builtin { - provider_name, callback, .. + provider_name, + callback, + .. } => (builtin_client_id(provider_name)?, (*callback).to_string()), ResolvedProvider::Custom { - device_authorization_endpoint: Some(_), .. + device_authorization_endpoint: Some(_), + .. } => { return Err(anyhow!( "mcp server {name:?} has a device endpoint; use device flow" @@ -557,10 +560,7 @@ mod tests { } async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { - mgr.start_paste_login(name) - .await - .unwrap_err() - .to_string() + mgr.start_paste_login(name).await.unwrap_err().to_string() } #[tokio::test] @@ -573,9 +573,13 @@ mod tests { let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let start = mgr.start_paste_login("anthro").await.unwrap(); - assert!(start.authorize_url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(start + .authorize_url + .starts_with("https://claude.ai/oauth/authorize?")); assert!(start.authorize_url.contains("client_id=anth-cid")); - assert!(start.authorize_url.contains(&format!("state={}", start.state))); + assert!(start + .authorize_url + .contains(&format!("state={}", start.state))); let pending = mgr.pending_paste_login("anthro").await.unwrap(); assert_eq!(pending.state, start.state); assert!(!pending.verifier.is_empty()); From e90fcbd1127d87ffc4c4640990e9896779638c1b Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:11:12 +0000 Subject: [PATCH 67/98] feat(openab-agent): split auth.json into TokenStore | PendingPasteLogin Untagged Serde enum `AuthEntry` keeps refresh-task state machine separate from in-flight paste-login state. Per Mira's Tick 39 review: repurposing TokenStore fields for pending entries would have made refresh loop on them. Adds `{load,save,remove}_pending_login` helpers (mcp-gated, wired in next slice via runtime::start_paste_login). --- openab-agent/src/auth.rs | 203 +++++++++++++++++++++++++++++++++++---- 1 file changed, 183 insertions(+), 20 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index ff289b298..0bad96e7e 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -39,6 +39,38 @@ pub struct TokenStore { pub provider: String, } +/// Transient per-server state captured at `start_paste_login` and consumed +/// by `complete_login` (ADR §6.4). Lives in `auth.json` under +/// `mcp-pending:`. `token_url` + `provider_name` are snapshotted +/// up front so a config edit between init and finish can't redirect the +/// token exchange. +/// +/// Unconditionally compiled (not behind `mcp` feature) so a non-mcp build +/// can still parse + round-trip an `auth.json` containing pending entries. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PendingPasteLogin { + pub verifier: String, + pub state: String, + pub token_url: String, + pub provider_name: String, +} + +/// `auth.json` value type. Untagged Serde enum: `TokenStore` has required +/// `access_token`, `PendingPasteLogin` has required `verifier` — the +/// shapes are disjoint, so deserialization picks the right variant +/// without an explicit tag (and existing files stay byte-compatible). +/// +/// Per Mira's Tick 39 review: option-A (repurposing TokenStore fields for +/// pending state) would have made the refresh task treat pending entries +/// as "expired tokens" and loop on them. The untagged enum keeps the two +/// state machines completely separate. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum AuthEntry { + Token(TokenStore), + Pending(PendingPasteLogin), +} + fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) @@ -54,7 +86,7 @@ fn auth_path() -> PathBuf { /// Discriminates by the top-level `access_token` key — present means the /// file is the legacy `TokenStore` shape, absent means the new namespaced /// map. A single JSON parse gives accurate error context either way. -fn read_auth_file(path: &Path) -> Result> { +fn read_auth_file(path: &Path) -> Result> { let data = std::fs::read_to_string(path)?; let value: serde_json::Value = serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; @@ -62,7 +94,7 @@ fn read_auth_file(path: &Path) -> Result> { let legacy: TokenStore = serde_json::from_value(value) .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; let mut map = HashMap::new(); - map.insert(CODEX_NAMESPACE.to_string(), legacy); + map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(legacy)); return Ok(map); } serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) @@ -72,7 +104,7 @@ fn read_auth_file(path: &Path) -> Result> { /// satisfies the ADR §6.1 refresh-token rotation contract — without it, a /// Spot interruption between local write and S3 sync would restore a /// revoked refresh token from durable storage on the next task start. -fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { +fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } @@ -106,18 +138,19 @@ pub fn load_tokens() -> Result { path.display() ) })?; - map.get(CODEX_NAMESPACE).cloned().ok_or_else(|| { - anyhow!( + match map.get(CODEX_NAMESPACE) { + Some(AuthEntry::Token(t)) => Ok(t.clone()), + _ => Err(anyhow!( "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", path.display() - ) - }) + )), + } } fn save_tokens(store: &TokenStore) -> Result<()> { let path = auth_path(); let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(CODEX_NAMESPACE.to_string(), store.clone()); + map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(store.clone())); write_auth_file(&path, &map) } @@ -130,9 +163,11 @@ pub fn load_namespaced_token(key: &str) -> Result { let path = auth_path(); let map = read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; - map.get(key) - .cloned() - .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) + match map.get(key) { + Some(AuthEntry::Token(t)) => Ok(t.clone()), + Some(AuthEntry::Pending(_)) => Err(anyhow!("{key:?} is a pending login, not a token")), + None => Err(anyhow!("no credentials stored for {key:?}")), + } } /// Insert or replace the credential at `key`, preserving all other entries. @@ -143,7 +178,54 @@ pub fn load_namespaced_token(key: &str) -> Result { pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { let path = auth_path(); let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(key.to_string(), store.clone()); + map.insert(key.to_string(), AuthEntry::Token(store.clone())); + write_auth_file(&path, &map) +} + +/// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors +/// if the key holds a token instead — the two namespaces shouldn't +/// collide, but a hand-edited file would. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) +pub fn load_pending_login(key: &str) -> Result { + let path = auth_path(); + let map = + read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + match map.get(key) { + Some(AuthEntry::Pending(p)) => Ok(p.clone()), + Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), + None => Err(anyhow!("no pending login for {key:?}")), + } +} + +/// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). +/// Read-modify-write — same serialization caveat as `save_namespaced_token`. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) +pub fn save_pending_login(key: &str, val: &PendingPasteLogin) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(key.to_string(), AuthEntry::Pending(val.clone())); + write_auth_file(&path, &map) +} + +/// Remove a pending-login entry (consumed on successful `complete_login`, +/// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (complete_login) +pub fn remove_pending_login(key: &str) -> Result<()> { + let path = auth_path(); + let mut map = match read_auth_file(&path) { + Ok(m) => m, + Err(_) => return Ok(()), + }; + if map.remove(key).is_none() { + return Ok(()); + } + if map.is_empty() { + let _ = std::fs::remove_file(&path); + return Ok(()); + } write_auth_file(&path, &map) } @@ -626,6 +708,13 @@ mod tests { assert_eq!(challenge, expected); } + fn token_of(entry: Option<&AuthEntry>) -> &TokenStore { + match entry { + Some(AuthEntry::Token(t)) => t, + other => panic!("expected Token, got {other:?}"), + } + } + #[test] fn read_auth_file_migrates_legacy_single_tenant_format() { let dir = tempfile::tempdir().unwrap(); @@ -635,7 +724,7 @@ mod tests { let map = read_auth_file(&path).unwrap(); assert_eq!(map.len(), 1); assert_eq!( - map.get(CODEX_NAMESPACE).unwrap().access_token, + token_of(map.get(CODEX_NAMESPACE)).access_token, "test_access_token_value" ); } @@ -645,13 +734,13 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("codex".to_string(), make_store(1)); - input.insert("mcp:linear".to_string(), make_store(2)); + input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); + input.insert("mcp:linear".to_string(), AuthEntry::Token(make_store(2))); write_auth_file(&path, &input).unwrap(); let map = read_auth_file(&path).unwrap(); assert_eq!(map.len(), 2); - assert_eq!(map.get("codex").unwrap().expires_at, 1); - assert_eq!(map.get("mcp:linear").unwrap().expires_at, 2); + assert_eq!(token_of(map.get("codex")).expires_at, 1); + assert_eq!(token_of(map.get("mcp:linear")).expires_at, 2); } #[test] @@ -659,12 +748,12 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("mcp:github".to_string(), make_store(42)); + input.insert("mcp:github".to_string(), AuthEntry::Token(make_store(42))); write_auth_file(&path, &input).unwrap(); let raw = std::fs::read_to_string(&path).unwrap(); assert!(raw.contains("mcp:github")); let map = read_auth_file(&path).unwrap(); - assert_eq!(map.get("mcp:github").unwrap().expires_at, 42); + assert_eq!(token_of(map.get("mcp:github")).expires_at, 42); } #[cfg(unix)] @@ -674,9 +763,83 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("codex".to_string(), make_store(0)); + input.insert("codex".to_string(), AuthEntry::Token(make_store(0))); write_auth_file(&path, &input).unwrap(); let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); } + + fn make_pending() -> PendingPasteLogin { + PendingPasteLogin { + verifier: "test-verifier".to_string(), + state: "test-state".to_string(), + token_url: "https://example.com/token".to_string(), + provider_name: "anthropic-mcp".to_string(), + } + } + + #[test] + fn auth_entry_untagged_round_trip_mixed_shapes() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); + input.insert( + "mcp-pending:linear".to_string(), + AuthEntry::Pending(make_pending()), + ); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(token_of(map.get("codex")).expires_at, 1); + match map.get("mcp-pending:linear") { + Some(AuthEntry::Pending(p)) => assert_eq!(p.verifier, "test-verifier"), + other => panic!("expected Pending, got {other:?}"), + } + } + + #[cfg(feature = "mcp")] + #[test] + fn pending_login_helpers_round_trip_via_global_path() { + // Drive the disk-backed save/load/remove path end-to-end. Touches + // the real `auth_path()` (env HOME) so isolate via a tempdir HOME. + // Single test = no need for an ENV_LOCK mutex. + let dir = tempfile::tempdir().unwrap(); + let prior_home = std::env::var("HOME").ok(); + // SAFETY: single-threaded, restored at end. + unsafe { + std::env::set_var("HOME", dir.path()); + } + let key = "mcp-pending:test-srv"; + save_pending_login(key, &make_pending()).unwrap(); + let got = load_pending_login(key).unwrap(); + assert_eq!(got, make_pending()); + remove_pending_login(key).unwrap(); + assert!(load_pending_login(key).is_err()); + unsafe { + match prior_home { + Some(h) => std::env::set_var("HOME", h), + None => std::env::remove_var("HOME"), + } + } + } + + #[cfg(feature = "mcp")] + #[test] + fn load_namespaced_token_errors_on_pending_entry() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert( + "mcp-pending:srv".to_string(), + AuthEntry::Pending(make_pending()), + ); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + // Directly assert the discriminant rather than calling + // `load_namespaced_token`, which would also touch HOME and race + // the pending-helpers test above. Same intent, smaller blast radius. + let pending = map.get("mcp-pending:srv"); + assert!(matches!(pending, Some(AuthEntry::Pending(_)))); + } } From 5372bf30c9e0ed1a13b583f5ae315843f04e2cb3 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:32:34 +0000 Subject: [PATCH 68/98] feat(openab-agent/mcp): persist pending paste-login to auth.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `start_paste_login` now writes the `mcp-pending:` entry via `auth::save_pending_login`, dropping the in-memory `pending_logins` map. Aligns runtime state with the ADR §6.4 contract ("kept in TokenStore") so `complete_login` survives an agent restart. To keep tests off the real `$HOME/.openab/agent/auth.json` (the cross-module HOME-env race that bit Tick 24), the auth-path becomes an injected `PathBuf` field: `from_config()` defaults to `auth::auth_path()`, `from_config_with_auth_path()` lets tests point at a tempdir. The two tests that exercise the disk path adopt a `mgr_with_tempdir` helper; rejection tests untouched (they error before persist). `auth::{load,save,remove}_pending_login` likewise take `&Path` so they're driven by the injected path, not a global. --- openab-agent/src/auth.rs | 66 ++++++++++++++------------------- openab-agent/src/mcp/runtime.rs | 66 +++++++++++++++++---------------- 2 files changed, 62 insertions(+), 70 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 0bad96e7e..3f1716a83 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -71,7 +71,10 @@ pub enum AuthEntry { Pending(PendingPasteLogin), } -fn auth_path() -> PathBuf { +/// Default location of `auth.json`. Exposed so `McpRuntimeManager` can +/// thread the same path into its constructor and tests can inject a +/// tempdir without touching `$HOME` (which would race cross-module). +pub fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) .join(".openab") @@ -184,13 +187,13 @@ pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { /// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors /// if the key holds a token instead — the two namespaces shouldn't -/// collide, but a hand-edited file would. +/// collide, but a hand-edited file would. `path` is injected so the +/// runtime manager can point tests at a tempdir; production callers pass +/// `auth_path()`. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) -pub fn load_pending_login(key: &str) -> Result { - let path = auth_path(); +pub fn load_pending_login(path: &Path, key: &str) -> Result { let map = - read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; match map.get(key) { Some(AuthEntry::Pending(p)) => Ok(p.clone()), Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), @@ -201,21 +204,18 @@ pub fn load_pending_login(key: &str) -> Result { /// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). /// Read-modify-write — same serialization caveat as `save_namespaced_token`. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) -pub fn save_pending_login(key: &str, val: &PendingPasteLogin) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); +pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Result<()> { + let mut map = read_auth_file(path).unwrap_or_default(); map.insert(key.to_string(), AuthEntry::Pending(val.clone())); - write_auth_file(&path, &map) + write_auth_file(path, &map) } /// Remove a pending-login entry (consumed on successful `complete_login`, /// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. #[cfg(feature = "mcp")] #[allow(dead_code)] // wired in next slice (complete_login) -pub fn remove_pending_login(key: &str) -> Result<()> { - let path = auth_path(); - let mut map = match read_auth_file(&path) { +pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { + let mut map = match read_auth_file(path) { Ok(m) => m, Err(_) => return Ok(()), }; @@ -223,10 +223,10 @@ pub fn remove_pending_login(key: &str) -> Result<()> { return Ok(()); } if map.is_empty() { - let _ = std::fs::remove_file(&path); + let _ = std::fs::remove_file(path); return Ok(()); } - write_auth_file(&path, &map) + write_auth_file(path, &map) } /// Remove the credential at `key`. Idempotent — missing key is not an @@ -800,28 +800,17 @@ mod tests { #[cfg(feature = "mcp")] #[test] - fn pending_login_helpers_round_trip_via_global_path() { - // Drive the disk-backed save/load/remove path end-to-end. Touches - // the real `auth_path()` (env HOME) so isolate via a tempdir HOME. - // Single test = no need for an ENV_LOCK mutex. + fn pending_login_helpers_round_trip_via_injected_path() { + // Tempdir path injected directly — no HOME-env shimming, so this + // test can't race auth-touching tests in other modules. let dir = tempfile::tempdir().unwrap(); - let prior_home = std::env::var("HOME").ok(); - // SAFETY: single-threaded, restored at end. - unsafe { - std::env::set_var("HOME", dir.path()); - } + let path = dir.path().join("auth.json"); let key = "mcp-pending:test-srv"; - save_pending_login(key, &make_pending()).unwrap(); - let got = load_pending_login(key).unwrap(); + save_pending_login(&path, key, &make_pending()).unwrap(); + let got = load_pending_login(&path, key).unwrap(); assert_eq!(got, make_pending()); - remove_pending_login(key).unwrap(); - assert!(load_pending_login(key).is_err()); - unsafe { - match prior_home { - Some(h) => std::env::set_var("HOME", h), - None => std::env::remove_var("HOME"), - } - } + remove_pending_login(&path, key).unwrap(); + assert!(load_pending_login(&path, key).is_err()); } #[cfg(feature = "mcp")] @@ -836,9 +825,10 @@ mod tests { ); write_auth_file(&path, &input).unwrap(); let map = read_auth_file(&path).unwrap(); - // Directly assert the discriminant rather than calling - // `load_namespaced_token`, which would also touch HOME and race - // the pending-helpers test above. Same intent, smaller blast radius. + // Assert the discriminant directly. `load_namespaced_token` would + // reach into the real `$HOME/.openab/agent/auth.json` and race + // cross-module tests; the variant check is the actual property + // under test. let pending = map.get("mcp-pending:srv"); assert!(matches!(pending, Some(AuthEntry::Pending(_)))); } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 8271b5b95..feb5b31fa 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -13,6 +13,7 @@ //! the duration of a child-process spawn + handshake. use std::collections::HashMap; +use std::path::PathBuf; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -25,6 +26,7 @@ use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; use super::flow::init_paste_authorize; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; +use crate::auth::{auth_path, load_pending_login, save_pending_login, PendingPasteLogin}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { @@ -69,23 +71,6 @@ impl std::fmt::Debug for ServerHandle { } } -/// Transient per-server state captured at `start_paste_login` and consumed -/// by `complete_login` (next slice). `token_url` + `provider_name` are -/// snapshotted up front so a config edit between the two calls can't -/// silently redirect the token exchange. -/// -/// ADR §6.4 says this lives "in TokenStore"; this slice keeps it in -/// process memory only — `auth.json` would need a heterogeneous-entry -/// schema change to hold non-token shapes, deferred to its own slice. -#[derive(Debug, Clone)] -#[allow(dead_code)] // wired in next slice (complete_login) -pub struct PendingPasteLogin { - pub verifier: String, - pub state: String, - pub token_url: String, - pub provider_name: String, -} - /// Public return of `start_paste_login`. The caller relays `authorize_url` /// to the user; `state` is echoed so the agent can show / log it without /// reaching into runtime internals. @@ -98,14 +83,21 @@ pub struct PasteLoginStart { /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct McpRuntimeManager { handles: Arc>>, - pending_logins: Arc>>, + /// `auth.json` location used for `mcp-pending:` persistence. + /// Injectable so tests can point at a tempdir instead of `$HOME`, + /// avoiding cross-module HOME-env races (Tick 24 lesson + ADR §6.4). + auth_path: PathBuf, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { + Self::from_config_with_auth_path(cfg, auth_path()) + } + + pub fn from_config_with_auth_path(cfg: McpConfig, auth_path: PathBuf) -> Self { let handles: HashMap<_, _> = cfg .servers .into_iter() @@ -121,7 +113,7 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), - pending_logins: Arc::new(RwLock::new(HashMap::new())), + auth_path, } } @@ -180,8 +172,9 @@ impl McpRuntimeManager { /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` /// block (ADR §6.4). Produces the authorize URL the agent surfaces to - /// the user; the matching PKCE verifier + `state` nonce are kept on - /// `self.pending_logins` for `complete_login` (next slice) to consume. + /// the user; the matching PKCE verifier + `state` nonce are persisted + /// under `mcp-pending:` in `auth.json` for `complete_login` + /// (next slice) to consume. /// /// Scoped to **built-in** providers this slice. Custom-provider /// paste-back needs runtime port allocation for the callback (§6.4), @@ -237,28 +230,26 @@ impl McpRuntimeManager { token_url: provider.token_url().to_string(), provider_name: provider_name_of(&provider), }; + save_pending_login(&self.auth_path, &pending_key(name), &pending)?; { let mut handles = self.handles.write().await; if let Some(handle) = handles.get_mut(name) { handle.status = ServerStatus::NeedsAuth; } } - self.pending_logins - .write() - .await - .insert(name.to_string(), pending); Ok(PasteLoginStart { authorize_url: started.url, state: started.state, }) } - /// Borrow the in-flight pending paste-login for `name`. Returns a - /// clone so callers don't hold the lock; `complete_login` (next - /// slice) is the intended consumer. + /// Read the on-disk pending paste-login for `name`. `None` if there's + /// no entry or the file is unreadable; `complete_login` (next slice) + /// is the intended consumer and will distinguish the cases via the + /// `auth::load_pending_login` error message. #[allow(dead_code)] // first prod caller is complete_login in next slice pub async fn pending_paste_login(&self, name: &str) -> Option { - self.pending_logins.read().await.get(name).cloned() + load_pending_login(&self.auth_path, &pending_key(name)).ok() } /// Lazy-connect the named server (ADR §5.7). Idempotent if already @@ -334,6 +325,11 @@ fn provider_name_of(provider: &ResolvedProvider) -> String { } } +/// `auth.json` key for an in-flight paste-login (ADR §6.4 namespace). +fn pending_key(name: &str) -> String { + format!("mcp-pending:{name}") +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -563,6 +559,12 @@ mod tests { mgr.start_paste_login(name).await.unwrap_err().to_string() } + fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + (McpRuntimeManager::from_config_with_auth_path(cfg, path), dir) + } + #[tokio::test] async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); @@ -571,7 +573,7 @@ mod tests { std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); } let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); + let (mgr, _dir) = mgr_with_tempdir(cfg); let start = mgr.start_paste_login("anthro").await.unwrap(); assert!(start .authorize_url @@ -597,7 +599,7 @@ mod tests { #[tokio::test] async fn start_paste_login_rejects_custom_provider_for_now() { let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); + let (mgr, _dir) = mgr_with_tempdir(cfg); let err = start_login_err(&mgr, "linear").await; assert!(err.contains("custom-provider"), "got: {err}"); assert!(mgr.pending_paste_login("linear").await.is_none()); From 4e2bb1bd3e921aed80f0969281dc28d05649d02d Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:41:22 +0000 Subject: [PATCH 69/98] style(openab-agent/mcp): satisfy rustfmt for mgr_with_tempdir tuple `(McpRuntimeManager::from_config_with_auth_path(cfg, path), dir)` at 60 chars between parens trips rustfmt's tuple width heuristic and gets exploded into a 4-line literal. Bind the manager first so the tuple stays a 2-token one-liner. No behaviour change. --- openab-agent/src/mcp/runtime.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index feb5b31fa..482d46c67 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -561,8 +561,8 @@ mod tests { fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - (McpRuntimeManager::from_config_with_auth_path(cfg, path), dir) + let mgr = McpRuntimeManager::from_config_with_auth_path(cfg, dir.path().join("auth.json")); + (mgr, dir) } #[tokio::test] From 16d52f6acdea5885c1eae6494e8a23dbfd77c299 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:54:00 +0000 Subject: [PATCH 70/98] feat(openab-agent/mcp): parse_paste_callback URL helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure URL → authorization-code helper for the upcoming `runtime::complete_login` (ADR §6.4). Validates the `state` echo before returning the `code` so CSRF / cross-flow contamination fails closed before any token-endpoint round-trip. Surfaces an `error=` query param verbatim and tolerates extra parameters (`iss`, vendor tracking) without rejecting valid callbacks. Token exchange + runtime wiring follow in the next slice; helper carries `#[allow(dead_code)]` until that lands so the no-feature build stays warning-clean. --- openab-agent/src/mcp/flow.rs | 84 ++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index 39ed8b13c..7d0fd7c80 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -1,9 +1,9 @@ //! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from //! `crate::auth::generate_pkce` — shared with the Codex paths so a -//! security-primitive change can't drift between modules. Orchestration -//! (device polling, callback parsing) lands in subsequent slices. +//! security-primitive change can't drift between modules. Device +//! polling orchestration lands in a subsequent slice. -use anyhow::Result; +use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; @@ -57,6 +57,35 @@ pub fn init_paste_authorize( }) } +/// Parse a paste-back callback URL into its authorization `code` after +/// validating the `state` echo. OAuth 2.1 RFC 6749 §10.12 + §4.1.2 — a +/// mismatched `state` indicates CSRF / cross-flow contamination and MUST +/// reject the exchange before any token-endpoint round-trip. Tolerates +/// extra query params (vendor-specific tracking, `iss`, etc.). +#[allow(dead_code)] // wired in next slice (runtime::complete_login) +pub fn parse_paste_callback(redirect_url: &str, expected_state: &str) -> Result { + let url = Url::parse(redirect_url).map_err(|e| anyhow!("invalid redirect URL: {e}"))?; + let mut code = None; + let mut state = None; + let mut error = None; + for (k, v) in url.query_pairs() { + match k.as_ref() { + "code" => code = Some(v.into_owned()), + "state" => state = Some(v.into_owned()), + "error" => error = Some(v.into_owned()), + _ => {} + } + } + if let Some(err) = error { + return Err(anyhow!("authorize endpoint returned error: {err}")); + } + let got_state = state.ok_or_else(|| anyhow!("callback missing state"))?; + if got_state != expected_state { + return Err(anyhow!("state mismatch; flow rejected")); + } + code.ok_or_else(|| anyhow!("callback missing code")) +} + #[cfg(test)] mod tests { use super::*; @@ -138,4 +167,53 @@ mod tests { assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); assert!(r.url.contains("scope=read+write")); } + + #[test] + fn parse_paste_callback_extracts_code_when_state_matches() { + let url = "http://localhost:53692/callback?code=abc123&state=xyz"; + let code = parse_paste_callback(url, "xyz").unwrap(); + assert_eq!(code, "abc123"); + } + + #[test] + fn parse_paste_callback_tolerates_extra_query_params() { + let url = "http://localhost:53692/cb?iss=https%3A%2F%2Fauth&state=s&code=c&tracking=1"; + let code = parse_paste_callback(url, "s").unwrap(); + assert_eq!(code, "c"); + } + + #[test] + fn parse_paste_callback_rejects_state_mismatch() { + let url = "http://localhost:53692/cb?code=c&state=wrong"; + let err = parse_paste_callback(url, "want").unwrap_err().to_string(); + assert!(err.contains("state mismatch"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_missing_state() { + let url = "http://localhost:53692/cb?code=c"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("missing state"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_missing_code() { + let url = "http://localhost:53692/cb?state=x"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("missing code"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_surfaces_authorize_error() { + let url = "http://localhost:53692/cb?error=access_denied&state=x"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("access_denied"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_unparseable_url() { + let url = "not a url"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("invalid redirect URL"), "got: {err}"); + } } From f716a333a91245fc12baf9c4ee9fa9be514783f9 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 06:25:53 +0000 Subject: [PATCH 71/98] feat(openab-agent/mcp): complete_login finishes paste-back OAuth flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the §6.4 paste-back loop opened by start_paste_login: read PendingPasteLogin from auth.json, validate the redirect URL's state nonce (RFC 6749 §10.12), POST the auth code + PKCE verifier to the snapshotted token_url (RFC 6749 §4.1.3 / RFC 7636 §4.5), persist the resulting TokenStore under , clear the pending entry, and transition NeedsAuth → Disconnected so the next connect() dials the authenticated transport. Split into complete_login (HTTP) + finish_login (pure persist) so tests cover the state-machine transition without a mock token endpoint. State-mismatch rejection leaves the pending entry intact — user can retry the same paste without re-issuing mcp login. Auth helpers gain path-injected siblings (save_namespaced_token_at, load_namespaced_token_at) matching the PendingPasteLogin helper convention. CLI surface (mcp login subcommand) is the next slice. Refactor extracts shared client/redirect resolution out of start_paste_login so both entry points raise the same error on a mid-flow config edit. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 23 ++- openab-agent/src/mcp/runtime.rs | 267 ++++++++++++++++++++++++++++---- 2 files changed, 254 insertions(+), 36 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 3f1716a83..6206bc087 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -163,9 +163,15 @@ fn save_tokens(store: &TokenStore) -> Result<()> { #[cfg(feature = "mcp")] #[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) pub fn load_namespaced_token(key: &str) -> Result { - let path = auth_path(); + load_namespaced_token_at(&auth_path(), key) +} + +/// Path-injected sibling of `load_namespaced_token` (Tick 42 lesson). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp login regression test) +pub fn load_namespaced_token_at(path: &Path, key: &str) -> Result { let map = - read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; match map.get(key) { Some(AuthEntry::Token(t)) => Ok(t.clone()), Some(AuthEntry::Pending(_)) => Err(anyhow!("{key:?} is a pending login, not a token")), @@ -179,10 +185,17 @@ pub fn load_namespaced_token(key: &str) -> Result { #[cfg(feature = "mcp")] #[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); + save_namespaced_token_at(&auth_path(), key, store) +} + +/// Path-injected sibling of `save_namespaced_token` so tests + the runtime +/// manager can target a tempdir without `$HOME` overrides (Tick 42 lesson). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp/oauth.rs complete_login) +pub fn save_namespaced_token_at(path: &Path, key: &str, store: &TokenStore) -> Result<()> { + let mut map = read_auth_file(path).unwrap_or_default(); map.insert(key.to_string(), AuthEntry::Token(store.clone())); - write_auth_file(&path, &map) + write_auth_file(path, &map) } /// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 482d46c67..43b5ef176 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -24,9 +24,12 @@ use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -use super::flow::init_paste_authorize; +use super::flow::{init_paste_authorize, parse_paste_callback}; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; -use crate::auth::{auth_path, load_pending_login, save_pending_login, PendingPasteLogin}; +use crate::auth::{ + auth_path, load_pending_login, remove_pending_login, save_namespaced_token_at, + save_pending_login, PendingPasteLogin, TokenStore, +}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { @@ -183,6 +186,91 @@ impl McpRuntimeManager { /// are explicit so the LLM can pick a different action. #[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) pub async fn start_paste_login(&self, name: &str) -> Result { + let (provider, client_id, redirect_uri) = self.resolve_paste_client(name).await?; + let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; + let pending = PendingPasteLogin { + verifier: started.code_verifier, + state: started.state.clone(), + token_url: provider.token_url().to_string(), + provider_name: provider_name_of(&provider), + }; + save_pending_login(&self.auth_path, &pending_key(name), &pending)?; + { + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::NeedsAuth; + } + } + Ok(PasteLoginStart { + authorize_url: started.url, + state: started.state, + }) + } + + /// Read the on-disk pending paste-login for `name`. `None` if there's + /// no entry or the file is unreadable. Used by `complete_login` to + /// drive flow continuation and by `mcp status` to surface a partially + /// completed login (next slice will add the status surfacing). + #[allow(dead_code)] // wired in next slice (mcp status surfacing) + pub async fn pending_paste_login(&self, name: &str) -> Option { + load_pending_login(&self.auth_path, &pending_key(name)).ok() + } + + /// Finish a paste-back OAuth flow (ADR §6.4). Reads the snapshotted + /// `PendingPasteLogin`, validates the redirect URL's `state` against + /// the snapshotted nonce (RFC 6749 §10.12), exchanges the auth code + /// at the snapshotted `token_url`, persists the resulting + /// `TokenStore` under ``, and clears the pending entry. Status + /// transitions `NeedsAuth → Disconnected` so the next `connect()` + /// dials the now-authenticated transport. + #[allow(dead_code)] // wired in next slice (mcp login CLI subcommand) + pub async fn complete_login(&self, name: &str, redirect_url: &str) -> Result<()> { + let pending = load_pending_login(&self.auth_path, &pending_key(name)) + .map_err(|_| anyhow!("no pending login for {name:?}; run `mcp login {name}` first"))?; + let code = parse_paste_callback(redirect_url, &pending.state)?; + let (_provider, client_id, redirect_uri) = self.resolve_paste_client(name).await?; + let resp = post_token_exchange( + &pending.token_url, + &client_id, + &redirect_uri, + &code, + &pending.verifier, + ) + .await?; + self.finish_login(name, &pending, resp).await + } + + /// Pure-persistence tail of `complete_login`. Split out so tests can + /// drive the state-machine + on-disk transition without a real token + /// endpoint. Errors leave the pending entry intact so the user can + /// retry the same flow. + async fn finish_login( + &self, + name: &str, + pending: &PendingPasteLogin, + resp: TokenExchangeResponse, + ) -> Result<()> { + let store = TokenStore { + access_token: resp.access_token, + refresh_token: resp.refresh_token.unwrap_or_default(), + expires_at: now_secs().saturating_add(resp.expires_in.unwrap_or(0)), + token_endpoint: pending.token_url.clone(), + provider: pending.provider_name.clone(), + }; + save_namespaced_token_at(&self.auth_path, name, &store)?; + remove_pending_login(&self.auth_path, &pending_key(name))?; + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::Disconnected; + } + Ok(()) + } + + /// Resolve a paste-back OAuth client `(provider, client_id, redirect_uri)` + /// from the server's config. Shared by `start_paste_login` and + /// `complete_login` so a config drift between init and finish surfaces + /// the same error from both entry points. + async fn resolve_paste_client(&self, name: &str) -> Result<(ResolvedProvider, String, String)> { let oauth_cfg = { let guard = self.handles.read().await; let handle = guard @@ -200,7 +288,6 @@ impl McpRuntimeManager { } } }; - let provider = resolve(&oauth_cfg)?; let (client_id, redirect_uri) = match &provider { ResolvedProvider::Builtin { @@ -222,34 +309,7 @@ impl McpRuntimeManager { )); } }; - - let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; - let pending = PendingPasteLogin { - verifier: started.code_verifier, - state: started.state.clone(), - token_url: provider.token_url().to_string(), - provider_name: provider_name_of(&provider), - }; - save_pending_login(&self.auth_path, &pending_key(name), &pending)?; - { - let mut handles = self.handles.write().await; - if let Some(handle) = handles.get_mut(name) { - handle.status = ServerStatus::NeedsAuth; - } - } - Ok(PasteLoginStart { - authorize_url: started.url, - state: started.state, - }) - } - - /// Read the on-disk pending paste-login for `name`. `None` if there's - /// no entry or the file is unreadable; `complete_login` (next slice) - /// is the intended consumer and will distinguish the cases via the - /// `auth::load_pending_login` error message. - #[allow(dead_code)] // first prod caller is complete_login in next slice - pub async fn pending_paste_login(&self, name: &str) -> Option { - load_pending_login(&self.auth_path, &pending_key(name)).ok() + Ok((provider, client_id, redirect_uri)) } /// Lazy-connect the named server (ADR §5.7). Idempotent if already @@ -330,6 +390,63 @@ fn pending_key(name: &str) -> String { format!("mcp-pending:{name}") } +/// Wall-clock seconds since Unix epoch. Saturates at 0 if the clock is +/// pre-epoch (would only happen on a misconfigured container). +fn now_secs() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +/// Token endpoint response (RFC 6749 §4.1.4 / §5.1). `refresh_token` and +/// `expires_in` are optional — some providers (xAI as of writing) omit +/// them on initial exchange. The runtime tolerates the absence and +/// records empty/zero, leaving the refresh path to bail explicitly when +/// invoked. +#[derive(Debug, serde::Deserialize)] +struct TokenExchangeResponse { + access_token: String, + #[serde(default)] + refresh_token: Option, + #[serde(default)] + expires_in: Option, +} + +/// POST the auth code to the OAuth 2.1 token endpoint per RFC 6749 +/// §4.1.3 + RFC 7636 §4.5 (PKCE verifier). Public client — no +/// `client_secret`. Errors fold body text into the message so transient +/// 4xx from the provider land in the user's terminal verbatim. +async fn post_token_exchange( + token_url: &str, + client_id: &str, + redirect_uri: &str, + code: &str, + code_verifier: &str, +) -> Result { + let client = reqwest::Client::builder() + .build() + .context("build reqwest client")?; + let resp = client + .post(token_url) + .form(&[ + ("grant_type", "authorization_code"), + ("code", code), + ("code_verifier", code_verifier), + ("client_id", client_id), + ("redirect_uri", redirect_uri), + ]) + .send() + .await + .with_context(|| format!("POST {token_url} (token exchange)"))?; + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + if !status.is_success() { + return Err(anyhow!("token endpoint returned {status}: {body}")); + } + serde_json::from_str(&body).map_err(|e| anyhow!("invalid token response: {e}; body={body}")) +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -695,4 +812,92 @@ mod tests { assert!(!env.contains_key("DISCORD_BOT_TOKEN")); assert!(!env.contains_key("ANTHROPIC_API_KEY")); } + + fn seed_pending(mgr: &McpRuntimeManager, name: &str, state: &str) -> PendingPasteLogin { + let pending = PendingPasteLogin { + verifier: "v3rifier".to_string(), + state: state.to_string(), + token_url: "https://example.test/token".to_string(), + provider_name: "linear".to_string(), + }; + save_pending_login(&mgr.auth_path, &pending_key(name), &pending).unwrap(); + pending + } + + #[tokio::test] + async fn complete_login_rejects_when_no_pending_entry() { + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let err = mgr + .complete_login("linear", "http://localhost/cb?code=c&state=s") + .await + .unwrap_err() + .to_string(); + assert!(err.contains("no pending login"), "expected hint in {err}"); + assert!(err.contains("mcp login"), "expected CLI hint in {err}"); + } + + #[tokio::test] + async fn complete_login_rejects_state_mismatch_and_keeps_pending() { + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let pending = seed_pending(&mgr, "linear", "want"); + let url = "http://localhost/cb?code=c&state=other"; + let err = mgr + .complete_login("linear", url) + .await + .unwrap_err() + .to_string(); + assert!( + err.contains("state mismatch"), + "expected CSRF guard in {err}" + ); + // Pending entry must survive a rejected attempt so the user can + // re-issue the same paste without going through `mcp login` again. + let got = mgr.pending_paste_login("linear").await.unwrap(); + assert_eq!(got, pending); + } + + #[tokio::test] + async fn finish_login_persists_token_clears_pending_and_unblocks_connect() { + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let pending = seed_pending(&mgr, "linear", "s"); + // Pre-set NeedsAuth so we can observe the transition. + { + let mut h = mgr.handles.write().await; + h.get_mut("linear").unwrap().status = ServerStatus::NeedsAuth; + } + let resp = TokenExchangeResponse { + access_token: "atok".to_string(), + refresh_token: Some("rtok".to_string()), + expires_in: Some(3600), + }; + mgr.finish_login("linear", &pending, resp).await.unwrap(); + assert!(mgr.pending_paste_login("linear").await.is_none()); + let token = crate::auth::load_namespaced_token_at(&mgr.auth_path, "linear").unwrap(); + assert_eq!(token.access_token, "atok"); + assert_eq!(token.refresh_token, "rtok"); + assert_eq!(token.token_endpoint, "https://example.test/token"); + assert_eq!(token.provider, "linear"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); + } + + #[tokio::test] + async fn finish_login_tolerates_provider_omitting_refresh_token() { + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let pending = seed_pending(&mgr, "linear", "s"); + let resp = TokenExchangeResponse { + access_token: "atok".to_string(), + refresh_token: None, + expires_in: None, + }; + mgr.finish_login("linear", &pending, resp).await.unwrap(); + let token = crate::auth::load_namespaced_token_at(&mgr.auth_path, "linear").unwrap(); + assert_eq!(token.access_token, "atok"); + assert!(token.refresh_token.is_empty()); + // expires_at = now_secs() + 0 → effectively "already expired"; the + // refresh path bails explicitly when invoked, which is fine here. + } } From 40d5ee93f3802ab1fb233f63401b8fa17ac25e2f Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 06:28:40 +0000 Subject: [PATCH 72/98] fix(openab-agent/mcp): u64::MAX sentinel for absent expires_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mira Tick 46 catch: when a provider omits both refresh_token and expires_in (Figma, Sentry, xAI), the prior `now + 0` fallback set the token "already expired", causing the next connect() to enter the refresh path, fail closed on the empty refresh_token, and bounce the user back to NeedsAuth seconds after a successful login. `expires_in: None` now uses the u64::MAX sentinel — `is_expired` returns false until the provider eventually 401s on use (at which point `mcp login` re-runs is the correct UX for non-refreshable tokens). Test now asserts the sentinel directly so the regression can't sneak back in. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 43b5ef176..1cac50e93 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -250,10 +250,24 @@ impl McpRuntimeManager { pending: &PendingPasteLogin, resp: TokenExchangeResponse, ) -> Result<()> { + // `expires_in: None` means the provider didn't advertise a + // lifetime (Figma, Sentry, xAI as of writing). Falling back to + // `now + 0` (Mira's Tick 46 catch) would set the token "already + // expired", triggering an immediate refresh on the next + // connect() — which fails closed if refresh_token is also None, + // bouncing the user back to NeedsAuth seconds after a successful + // login. Treat absent `expires_in` as a long-lived token via the + // u64::MAX sentinel: `is_expired` will return false until the + // provider eventually 401s on use (at which point the user runs + // `mcp login` again, the correct UX for non-refreshable tokens). + let expires_at = match resp.expires_in { + Some(secs) => now_secs().saturating_add(secs), + None => u64::MAX, + }; let store = TokenStore { access_token: resp.access_token, refresh_token: resp.refresh_token.unwrap_or_default(), - expires_at: now_secs().saturating_add(resp.expires_in.unwrap_or(0)), + expires_at, token_endpoint: pending.token_url.clone(), provider: pending.provider_name.clone(), }; @@ -897,7 +911,9 @@ mod tests { let token = crate::auth::load_namespaced_token_at(&mgr.auth_path, "linear").unwrap(); assert_eq!(token.access_token, "atok"); assert!(token.refresh_token.is_empty()); - // expires_at = now_secs() + 0 → effectively "already expired"; the - // refresh path bails explicitly when invoked, which is fine here. + // Long-lived sentinel: no `expires_in` from the provider must NOT + // cause an immediate-expiry / refresh-loop / NeedsAuth bounce on + // first use (Mira Tick 46 catch). + assert_eq!(token.expires_at, u64::MAX); } } From 119edf44a91b9a399ee9313bc9ea4256e5813890 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 06:33:04 +0000 Subject: [PATCH 73/98] =?UTF-8?q?feat(openab-agent/mcp):=20mcp=20login=20C?= =?UTF-8?q?LI=20wires=20=C2=A76.4=20paste-back=20end-to-end?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `openab-agent mcp login [--paste URL]` drives the whole paste-back loop: start_paste_login pins PKCE state, the CLI shows the authorize URL, blocks on stdin (or --paste for scripted use) for the redirect URL, then complete_login validates state + exchanges the code + persists the TokenStore. --paste exists for CI smoke tests and scripted setups; without it the CLI is interactive (read_redirect_from_stdin). State-mismatch or network failure leaves the pending entry intact so the user can retry with a fresh paste without re-issuing `mcp login`. Drops the #[allow(dead_code)] markers from start_paste_login, complete_login, save_namespaced_token_at, remove_pending_login and PasteLoginStart — all reachable from main via cli_login now, so dead_code propagates through the call graph (skill rule). Phase 2 user-facing surface is now complete: list / status / connect / login. mcp status surfacing of NeedsAuth + the device-code flow remain for the next slices. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 2 -- openab-agent/src/main.rs | 12 +++++++ openab-agent/src/mcp/mod.rs | 62 +++++++++++++++++++++++++++++++++ openab-agent/src/mcp/runtime.rs | 3 -- 4 files changed, 74 insertions(+), 5 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 6206bc087..c937862aa 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -191,7 +191,6 @@ pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { /// Path-injected sibling of `save_namespaced_token` so tests + the runtime /// manager can target a tempdir without `$HOME` overrides (Tick 42 lesson). #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp/oauth.rs complete_login) pub fn save_namespaced_token_at(path: &Path, key: &str, store: &TokenStore) -> Result<()> { let mut map = read_auth_file(path).unwrap_or_default(); map.insert(key.to_string(), AuthEntry::Token(store.clone())); @@ -226,7 +225,6 @@ pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Re /// Remove a pending-login entry (consumed on successful `complete_login`, /// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (complete_login) pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { let mut map = match read_auth_file(path) { Ok(m) => m, diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 7acf6769b..6e73338f2 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -50,6 +50,17 @@ enum McpAction { /// Server name as configured in mcp.json name: String, }, + /// Authenticate with an MCP server's OAuth provider (paste-back flow, + /// ADR §6.4). Prints the authorize URL, then reads the post-redirect + /// URL from stdin (or `--paste` for non-interactive use). + Login { + /// Server name as configured in mcp.json + name: String, + /// Pre-fill the redirect URL (skip the stdin prompt). Useful for + /// scripted setups and CI smoke tests. + #[arg(long, value_name = "URL")] + paste: Option, + }, } #[derive(Subcommand)] @@ -103,6 +114,7 @@ async fn main() { McpAction::List { resolve } => mcp::cli_list_servers(resolve), McpAction::Status => mcp::cli_show_status().await, McpAction::Connect { name } => mcp::cli_connect(name).await, + McpAction::Login { name, paste } => mcp::cli_login(name, paste).await, }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 81278aa4e..44837160f 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -154,3 +154,65 @@ pub async fn cli_connect(name: String) { } } } + +/// `openab-agent mcp login [--paste URL]`. Drives the §6.4 +/// paste-back flow end-to-end: +/// +/// 1. `start_paste_login` builds the authorize URL + pins PKCE state to +/// `auth.json` under `mcp-pending:` +/// 2. The CLI prints the URL for the user to open in a browser, then +/// blocks on stdin waiting for the redirect URL to be pasted back +/// (or skips the prompt when `--paste` was supplied) +/// 3. `complete_login` validates the `state` nonce, exchanges the auth +/// code, persists the resulting `TokenStore`, and clears the pending +/// entry — leaving the server `Disconnected` and ready for `connect` +/// +/// Errors at any step exit non-zero; the pending entry is preserved on +/// state-mismatch / network failure so the user can retry with a fresh +/// paste of the same redirect URL without re-running this command. +pub async fn cli_login(name: String, paste: Option) { + let manager = McpRuntimeManager::from_config(load_config_or_exit()); + let start = match manager.start_paste_login(&name).await { + Ok(s) => s, + Err(e) => { + eprintln!("✗ {name}: {e:#}"); + std::process::exit(1); + } + }; + println!("Open this URL in a browser to authorize:"); + println!(); + println!(" {}", start.authorize_url); + println!(); + println!("State nonce (pinned): {}", start.state); + println!(); + let redirect = match paste { + Some(u) => u, + None => match read_redirect_from_stdin() { + Ok(u) => u, + Err(e) => { + eprintln!("✗ failed to read redirect URL: {e}"); + std::process::exit(1); + } + }, + }; + if redirect.is_empty() { + eprintln!("✗ empty redirect URL — aborting"); + std::process::exit(1); + } + match manager.complete_login(&name, &redirect).await { + Ok(()) => println!("● logged in: {name}"), + Err(e) => { + eprintln!("✗ login failed: {e:#}"); + std::process::exit(1); + } + } +} + +fn read_redirect_from_stdin() -> std::io::Result { + use std::io::Write; + print!("Paste the FULL redirect URL: "); + std::io::stdout().flush()?; + let mut line = String::new(); + std::io::stdin().read_line(&mut line)?; + Ok(line.trim().to_string()) +} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 1cac50e93..613c84483 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -78,7 +78,6 @@ impl std::fmt::Debug for ServerHandle { /// to the user; `state` is echoed so the agent can show / log it without /// reaching into runtime internals. #[derive(Debug, Clone)] -#[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) pub struct PasteLoginStart { pub authorize_url: String, pub state: String, @@ -184,7 +183,6 @@ impl McpRuntimeManager { /// and any provider that advertises a `device_authorization_endpoint` /// should run device-code instead (§6.4 selection logic). Both errors /// are explicit so the LLM can pick a different action. - #[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) pub async fn start_paste_login(&self, name: &str) -> Result { let (provider, client_id, redirect_uri) = self.resolve_paste_client(name).await?; let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; @@ -223,7 +221,6 @@ impl McpRuntimeManager { /// `TokenStore` under ``, and clears the pending entry. Status /// transitions `NeedsAuth → Disconnected` so the next `connect()` /// dials the now-authenticated transport. - #[allow(dead_code)] // wired in next slice (mcp login CLI subcommand) pub async fn complete_login(&self, name: &str, redirect_url: &str) -> Result<()> { let pending = load_pending_login(&self.auth_path, &pending_key(name)) .map_err(|_| anyhow!("no pending login for {name:?}; run `mcp login {name}` first"))?; From 744a9d14367067cd646dcd3d280a86eaa05f543e Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 06:37:35 +0000 Subject: [PATCH 74/98] docs(openab-agent/mcp): advertise stdin pipe as defense-in-depth over --paste MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mira's PR #962 review (Q2) noted that --paste echoes the redirect URL into shell history and `ps`. PKCE makes either route theoretically safe, but `echo "" | openab-agent mcp login ` leaves no trace — preferred for CI / scripts. --- openab-agent/src/main.rs | 12 +++++++++--- openab-agent/src/mcp/mod.rs | 5 +++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 6e73338f2..018d55424 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -52,12 +52,18 @@ enum McpAction { }, /// Authenticate with an MCP server's OAuth provider (paste-back flow, /// ADR §6.4). Prints the authorize URL, then reads the post-redirect - /// URL from stdin (or `--paste` for non-interactive use). + /// URL from stdin. + /// + /// For non-interactive use, prefer piping the URL via stdin + /// (`echo "" | openab-agent mcp login `) over `--paste` — + /// pipes leave no trace in shell history or `ps` output. PKCE makes + /// either route safe in theory; the pipe form is defense-in-depth. Login { /// Server name as configured in mcp.json name: String, - /// Pre-fill the redirect URL (skip the stdin prompt). Useful for - /// scripted setups and CI smoke tests. + /// Pre-fill the redirect URL (skip the stdin prompt). Convenient + /// for ad-hoc testing; CI / scripts should prefer the stdin pipe + /// form to keep `code` + `state` out of shell history and `ps`. #[arg(long, value_name = "URL")] paste: Option, }, diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 44837160f..5e8574cb2 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -170,6 +170,11 @@ pub async fn cli_connect(name: String) { /// Errors at any step exit non-zero; the pending entry is preserved on /// state-mismatch / network failure so the user can retry with a fresh /// paste of the same redirect URL without re-running this command. +/// +/// Security note: for non-interactive use, prefer piping the redirect +/// URL via stdin (`echo "" | openab-agent mcp login `) over +/// `--paste`. PKCE makes either route safe in theory, but pipes leave +/// no trace in shell history or `ps` output — defense-in-depth. pub async fn cli_login(name: String, paste: Option) { let manager = McpRuntimeManager::from_config(load_config_or_exit()); let start = match manager.start_paste_login(&name).await { From 338aab9c85efa8f3ae4eebf07cea777eb51ddac7 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 06:56:31 +0000 Subject: [PATCH 75/98] feat(openab-agent/mcp): mcp status surfaces in-flight paste-back logins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `auth::list_pending_logins_at` enumerates `mcp-pending:` entries; `McpRuntimeManager::pending_logins` delegates via injected auth_path. `cli_show_status` cross-references against configured servers to flag both "login pending — finish via mcp login " and orphaned entries with no matching mcp.json server. Introduces `PENDING_PREFIX` const so the read/write sides can't drift on the literal. --- openab-agent/src/auth.rs | 55 +++++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 34 ++++++++++++++++++-- openab-agent/src/mcp/runtime.rs | 31 +++++++++++++++++-- 3 files changed, 114 insertions(+), 6 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index c937862aa..258d22fc4 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -197,6 +197,32 @@ pub fn save_namespaced_token_at(path: &Path, key: &str, store: &TokenStore) -> R write_auth_file(path, &map) } +/// `auth.json` namespace prefix for in-flight paste-back logins (ADR §6.4). +/// Pinned as a constant so `pending_key` (write side) and +/// `list_pending_logins_at` (read side) can't drift on the string literal. +#[cfg(feature = "mcp")] +pub const PENDING_PREFIX: &str = "mcp-pending:"; + +/// Enumerate the server names of all in-flight `mcp-pending:` entries +/// — surfaces partially completed paste-back logins to `mcp status`. Returns +/// sorted names with the prefix stripped. Missing / unreadable `auth.json` +/// → empty Vec; this is a best-effort status view, not a load-bearing path. +#[cfg(feature = "mcp")] +pub fn list_pending_logins_at(path: &Path) -> Vec { + let Ok(map) = read_auth_file(path) else { + return Vec::new(); + }; + let mut names: Vec = map + .iter() + .filter_map(|(k, v)| match v { + AuthEntry::Pending(_) => k.strip_prefix(PENDING_PREFIX).map(str::to_string), + AuthEntry::Token(_) => None, + }) + .collect(); + names.sort(); + names +} + /// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors /// if the key holds a token instead — the two namespaces shouldn't /// collide, but a hand-edited file would. `path` is injected so the @@ -824,6 +850,35 @@ mod tests { assert!(load_pending_login(&path, key).is_err()); } + #[cfg(feature = "mcp")] + #[test] + fn list_pending_logins_strips_prefix_sorts_and_skips_tokens() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), AuthEntry::Token(make_store(0))); + input.insert( + "mcp-pending:zed-mcp".to_string(), + AuthEntry::Pending(make_pending()), + ); + input.insert( + "mcp-pending:linear".to_string(), + AuthEntry::Pending(make_pending()), + ); + input.insert("mcp:linear".to_string(), AuthEntry::Token(make_store(1))); + write_auth_file(&path, &input).unwrap(); + let names = list_pending_logins_at(&path); + assert_eq!(names, vec!["linear".to_string(), "zed-mcp".to_string()]); + } + + #[cfg(feature = "mcp")] + #[test] + fn list_pending_logins_returns_empty_on_missing_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("missing.json"); + assert!(list_pending_logins_at(&path).is_empty()); + } + #[cfg(feature = "mcp")] #[test] fn load_namespaced_token_errors_on_pending_entry() { diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 5e8574cb2..f6f684e51 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -128,15 +128,43 @@ fn print_json(status: &str, name: &str, value: &T) { /// /// Prints per-server runtime status. Servers start `Disconnected` and only /// advance after `mcp connect ` (or, later, lazy dial from the agent -/// path). +/// path). Servers with an in-flight `mcp-pending:` entry get a +/// `(login pending — run mcp login )` suffix so the user knows the +/// flow stalled mid-paste-back. Orphaned pending entries (no matching +/// config) get listed under a separator so they're visible for cleanup. pub async fn cli_show_status() { let manager = McpRuntimeManager::from_config(load_config_or_exit()); if manager.is_empty().await { println!("No MCP servers configured."); return; } - for (name, status) in manager.statuses().await { - println!("{} {name}", status.icon()); + let statuses = manager.statuses().await; + let pending: std::collections::HashSet = manager.pending_logins().into_iter().collect(); + for (name, status) in &statuses { + let mut line = format!("{} {name}", status.icon()); + if pending.contains(name) { + line.push_str(&format!( + " (login pending — run `mcp login {name}` to finish)" + )); + } else if matches!(status, runtime::ServerStatus::NeedsAuth) { + line.push_str(&format!(" (run `mcp login {name}`)")); + } + println!("{line}"); + } + let configured: std::collections::HashSet<&str> = + statuses.iter().map(|(n, _)| n.as_str()).collect(); + let orphans: Vec<&String> = pending + .iter() + .filter(|n| !configured.contains(n.as_str())) + .collect(); + if !orphans.is_empty() { + println!(); + println!("Orphaned pending logins (no matching server in mcp.json):"); + let mut sorted = orphans; + sorted.sort(); + for name in sorted { + println!(" ⏳ {name}"); + } } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 613c84483..4c1eb4131 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -27,8 +27,8 @@ use super::config::{McpConfig, ServerConfig}; use super::flow::{init_paste_authorize, parse_paste_callback}; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; use crate::auth::{ - auth_path, load_pending_login, remove_pending_login, save_namespaced_token_at, - save_pending_login, PendingPasteLogin, TokenStore, + auth_path, list_pending_logins_at, load_pending_login, remove_pending_login, + save_namespaced_token_at, save_pending_login, PendingPasteLogin, TokenStore, PENDING_PREFIX, }; #[derive(Debug, Clone, PartialEq, Eq)] @@ -137,6 +137,17 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } + /// Sorted server names with an in-flight `mcp-pending:` entry in + /// `auth.json`. Lets `mcp status` surface "you started a login but + /// haven't finished" — including for servers no longer in config + /// (caller cross-references against `statuses()` to spot orphans). + /// Synchronous filesystem read on the same thread as the caller; the + /// pending map is tiny (~one entry per concurrent login) so blocking is + /// trivial and avoids tokio::task::spawn_blocking overhead. + pub fn pending_logins(&self) -> Vec { + list_pending_logins_at(&self.auth_path) + } + /// Clone the live MCP client handle for `name` out from under a short /// read lock. The caller `.await`s on the returned `Arc` with no /// runtime lock held, so background writers (idle eviction, new @@ -398,7 +409,7 @@ fn provider_name_of(provider: &ResolvedProvider) -> String { /// `auth.json` key for an in-flight paste-login (ADR §6.4 namespace). fn pending_key(name: &str) -> String { - format!("mcp-pending:{name}") + format!("{PENDING_PREFIX}{name}") } /// Wall-clock seconds since Unix epoch. Saturates at 0 if the clock is @@ -894,6 +905,20 @@ mod tests { assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); } + #[tokio::test] + async fn pending_logins_returns_sorted_names_and_includes_orphans() { + // `linear` is in cfg; `zed-mcp` + `ghost` are not — surfacing all + // three is the point (orphans get separately filed by cli_show_status). + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + assert!(mgr.pending_logins().is_empty()); + seed_pending(&mgr, "zed-mcp", "s1"); + seed_pending(&mgr, "linear", "s2"); + seed_pending(&mgr, "ghost", "s3"); + let names = mgr.pending_logins(); + assert_eq!(names, vec!["ghost", "linear", "zed-mcp"]); + } + #[tokio::test] async fn finish_login_tolerates_provider_omitting_refresh_token() { let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); From ece4eaf5f2bf9ec3937c436d92a796fa4701b831 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 07:50:57 +0000 Subject: [PATCH 76/98] refactor(openab-agent/mcp): single pending-key constructor + cli_show_status one-pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify pass on 9bca3de: - auth.rs: PENDING_PREFIX demoted to private; pending_key() promoted to pub fn as the single public constructor. runtime.rs uses it instead of formatting against the const, so the namespace literal lives in exactly one place. - mcp/mod.rs: cli_show_status folds the orphan calculation into the print loop via pending.remove(name) — the leftover set IS the orphans, dropping the second configured: HashSet<&str> pass. - mcp/mod.rs: drop the cli_login "Security note" docstring — the same rationale lives on the Login clap subcommand doc in main.rs, which is the user-facing surface. - runtime.rs: trim the spawn_blocking rationale on pending_logins — moved next to list_pending_logins_at where the sync read actually lives. Net -10 LOC across 3 files; ~/ops/openab-agent-ci.sh --quick green. --- openab-agent/src/auth.rs | 16 ++++++++++++---- openab-agent/src/mcp/mod.rs | 24 +++++++----------------- openab-agent/src/mcp/runtime.rs | 12 ++---------- 3 files changed, 21 insertions(+), 31 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 258d22fc4..ffd753767 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -197,16 +197,24 @@ pub fn save_namespaced_token_at(path: &Path, key: &str, store: &TokenStore) -> R write_auth_file(path, &map) } -/// `auth.json` namespace prefix for in-flight paste-back logins (ADR §6.4). -/// Pinned as a constant so `pending_key` (write side) and -/// `list_pending_logins_at` (read side) can't drift on the string literal. #[cfg(feature = "mcp")] -pub const PENDING_PREFIX: &str = "mcp-pending:"; +const PENDING_PREFIX: &str = "mcp-pending:"; + +/// `auth.json` key for an in-flight paste-back login (ADR §6.4 namespace). +/// Single construction site so read/write callers can't drift on the literal. +#[cfg(feature = "mcp")] +pub fn pending_key(name: &str) -> String { + format!("{PENDING_PREFIX}{name}") +} /// Enumerate the server names of all in-flight `mcp-pending:` entries /// — surfaces partially completed paste-back logins to `mcp status`. Returns /// sorted names with the prefix stripped. Missing / unreadable `auth.json` /// → empty Vec; this is a best-effort status view, not a load-bearing path. +/// +/// Synchronous filesystem read: the pending map is tiny (~one entry per +/// concurrent login), so blocking is trivial and avoids `spawn_blocking` +/// overhead — callers may invoke this from an async context directly. #[cfg(feature = "mcp")] pub fn list_pending_logins_at(path: &Path) -> Vec { let Ok(map) = read_auth_file(path) else { diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index f6f684e51..b9d6c8be3 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -139,10 +139,11 @@ pub async fn cli_show_status() { return; } let statuses = manager.statuses().await; - let pending: std::collections::HashSet = manager.pending_logins().into_iter().collect(); + let mut pending: std::collections::HashSet = + manager.pending_logins().into_iter().collect(); for (name, status) in &statuses { let mut line = format!("{} {name}", status.icon()); - if pending.contains(name) { + if pending.remove(name) { line.push_str(&format!( " (login pending — run `mcp login {name}` to finish)" )); @@ -151,18 +152,12 @@ pub async fn cli_show_status() { } println!("{line}"); } - let configured: std::collections::HashSet<&str> = - statuses.iter().map(|(n, _)| n.as_str()).collect(); - let orphans: Vec<&String> = pending - .iter() - .filter(|n| !configured.contains(n.as_str())) - .collect(); - if !orphans.is_empty() { + if !pending.is_empty() { println!(); println!("Orphaned pending logins (no matching server in mcp.json):"); - let mut sorted = orphans; - sorted.sort(); - for name in sorted { + let mut orphans: Vec = pending.into_iter().collect(); + orphans.sort(); + for name in orphans { println!(" ⏳ {name}"); } } @@ -198,11 +193,6 @@ pub async fn cli_connect(name: String) { /// Errors at any step exit non-zero; the pending entry is preserved on /// state-mismatch / network failure so the user can retry with a fresh /// paste of the same redirect URL without re-running this command. -/// -/// Security note: for non-interactive use, prefer piping the redirect -/// URL via stdin (`echo "" | openab-agent mcp login `) over -/// `--paste`. PKCE makes either route safe in theory, but pipes leave -/// no trace in shell history or `ps` output — defense-in-depth. pub async fn cli_login(name: String, paste: Option) { let manager = McpRuntimeManager::from_config(load_config_or_exit()); let start = match manager.start_paste_login(&name).await { diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 4c1eb4131..f259444ac 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -27,8 +27,8 @@ use super::config::{McpConfig, ServerConfig}; use super::flow::{init_paste_authorize, parse_paste_callback}; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; use crate::auth::{ - auth_path, list_pending_logins_at, load_pending_login, remove_pending_login, - save_namespaced_token_at, save_pending_login, PendingPasteLogin, TokenStore, PENDING_PREFIX, + auth_path, list_pending_logins_at, load_pending_login, pending_key, remove_pending_login, + save_namespaced_token_at, save_pending_login, PendingPasteLogin, TokenStore, }; #[derive(Debug, Clone, PartialEq, Eq)] @@ -141,9 +141,6 @@ impl McpRuntimeManager { /// `auth.json`. Lets `mcp status` surface "you started a login but /// haven't finished" — including for servers no longer in config /// (caller cross-references against `statuses()` to spot orphans). - /// Synchronous filesystem read on the same thread as the caller; the - /// pending map is tiny (~one entry per concurrent login) so blocking is - /// trivial and avoids tokio::task::spawn_blocking overhead. pub fn pending_logins(&self) -> Vec { list_pending_logins_at(&self.auth_path) } @@ -407,11 +404,6 @@ fn provider_name_of(provider: &ResolvedProvider) -> String { } } -/// `auth.json` key for an in-flight paste-login (ADR §6.4 namespace). -fn pending_key(name: &str) -> String { - format!("{PENDING_PREFIX}{name}") -} - /// Wall-clock seconds since Unix epoch. Saturates at 0 if the clock is /// pre-epoch (would only happen on a misconfigured container). fn now_secs() -> u64 { From e5bc95195e1149f9b24fbe65b9f4bb88ad077aa0 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 08:04:18 +0000 Subject: [PATCH 77/98] feat(openab-agent/mcp): connect() injects cached bearer for HTTP+oauth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 / ADR §6.5: HTTP+oauth servers now dial with their cached access_token when one is present and not within REFRESH_SKEW_SECONDS of expiry. Refresh-token rotation for the expired-but-recoverable case is the next slice — for now, expired tokens fall through to NeedsAuth alongside missing tokens, so `mcp login` stays the single user-actionable path. - auth::is_expired now pub(crate) — single source of truth for the expiry+skew check across codex (get_valid_token) and mcp (connect). - Dial::Http carries Option auth; Dial::run uses StreamableHttpClientTransportConfig::with_uri(url).auth_header(token) when present, falls back to from_uri for anonymous HTTP. - connect() oauth branch tries load_namespaced_token_at first; the Ok+!expired arm builds an authenticated Dial, all other arms bounce to NeedsAuth. Tests: - connect_oauth_with_valid_cached_token_attempts_dial_not_needs_auth proves the bearer-injection path reaches handshake (fails at the hermetic 127.0.0.1:1 dead address, surfaces Failed not NeedsAuth). - connect_oauth_with_expired_token_bounces_to_needs_auth pins the expiry guard against future refresh-flow regressions. --- openab-agent/src/auth.rs | 2 +- openab-agent/src/mcp/runtime.rs | 119 +++++++++++++++++++++++++++----- 2 files changed, 103 insertions(+), 18 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index ffd753767..e5accc68d 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -295,7 +295,7 @@ pub fn remove_namespaced_token(key: &str) -> Result<()> { write_auth_file(&path, &map) } -fn is_expired(store: &TokenStore) -> bool { +pub(crate) fn is_expired(store: &TokenStore) -> bool { let now = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap_or_default() diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index f259444ac..f6afa9c12 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use rmcp::service::{RoleClient, RunningService}; +use rmcp::transport::streamable_http_client::StreamableHttpClientTransportConfig; use rmcp::transport::{ConfigureCommandExt, StreamableHttpClientTransport, TokioChildProcess}; use rmcp::ServiceExt; use tokio::process::Command; @@ -27,8 +28,9 @@ use super::config::{McpConfig, ServerConfig}; use super::flow::{init_paste_authorize, parse_paste_callback}; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; use crate::auth::{ - auth_path, list_pending_logins_at, load_pending_login, pending_key, remove_pending_login, - save_namespaced_token_at, save_pending_login, PendingPasteLogin, TokenStore, + auth_path, is_expired, list_pending_logins_at, load_namespaced_token_at, load_pending_login, + pending_key, remove_pending_login, save_namespaced_token_at, save_pending_login, + PendingPasteLogin, TokenStore, }; #[derive(Debug, Clone, PartialEq, Eq)] @@ -350,19 +352,30 @@ impl McpRuntimeManager { ServerConfig::Stdio { command, args, env, .. } => Dial::Stdio { command, args, env }, - // Oauth-protected servers can't be dialed via plain connect; - // mark `NeedsAuth` so `mcp status` shows a persistent - // "waiting for login" signal (vs `Disconnected`, which - // implies a plain `connect` would succeed). The `Failed` - // path remains reserved for dials that were attempted and - // failed at handshake. - ServerConfig::Http { oauth: Some(_), .. } => { - handle.status = ServerStatus::NeedsAuth; - return Err(anyhow!( - "mcp server {name:?} needs oauth login — run `mcp login {name}`" - )); - } - ServerConfig::Http { url, .. } => Dial::Http { url }, + // Oauth-protected: dial with a cached bearer if one exists + // and is still within the refresh skew; otherwise bounce to + // `NeedsAuth` so `mcp status` shows a persistent "waiting + // for login" signal (vs `Disconnected`, which implies a + // plain `connect` would succeed). Refresh-token rotation + // for the expired-but-recoverable case is the next slice; + // for now an expired token falls through to NeedsAuth. + ServerConfig::Http { + url, + oauth: Some(_), + .. + } => match load_namespaced_token_at(&self.auth_path, name) { + Ok(store) if !is_expired(&store) => Dial::Http { + url, + auth: Some(store.access_token), + }, + _ => { + handle.status = ServerStatus::NeedsAuth; + return Err(anyhow!( + "mcp server {name:?} needs oauth login — run `mcp login {name}`" + )); + } + }, + ServerConfig::Http { url, .. } => Dial::Http { url, auth: None }, }; handle.status = ServerStatus::Connecting; dial @@ -472,6 +485,8 @@ enum Dial { }, Http { url: String, + /// Bearer token for oauth-protected servers; `None` for anonymous HTTP. + auth: Option, }, } @@ -490,8 +505,15 @@ impl Dial { .await .with_context(|| format!("mcp handshake with {command:?}")) } - Dial::Http { url } => { - let transport = StreamableHttpClientTransport::from_uri(url.as_str()); + Dial::Http { url, auth } => { + let transport = match auth { + Some(token) => { + let cfg = StreamableHttpClientTransportConfig::with_uri(url.as_str()) + .auth_header(token); + StreamableHttpClientTransport::from_config(cfg) + } + None => StreamableHttpClientTransport::from_uri(url.as_str()), + }; ().serve(transport) .await .with_context(|| format!("mcp handshake with {url:?}")) @@ -911,6 +933,69 @@ mod tests { assert_eq!(names, vec!["ghost", "linear", "zed-mcp"]); } + fn dead_oauth_cfg() -> &'static str { + // 127.0.0.1:1 dials hermetically (no reachable MCP server) so + // tests can prove the connect() reached the dial — i.e. the + // oauth branch didn't short-circuit at NeedsAuth — without any + // network round-trip. + r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "http://127.0.0.1:1/mcp", + "oauth": { + "provider": "linear", + "authorize_url": "https://linear.app/oauth/authorize", + "token_url": "https://api.linear.app/oauth/token", + "client_id": "linear-client", + "scopes": ["read"] + } + } + } + }"# + } + + fn seed_token(mgr: &McpRuntimeManager, name: &str, expires_at: u64) { + let store = TokenStore { + access_token: format!("atok-{name}"), + refresh_token: "rtok".to_string(), + expires_at, + token_endpoint: "https://api.linear.app/oauth/token".to_string(), + provider: "linear".to_string(), + }; + save_namespaced_token_at(&mgr.auth_path, name, &store).unwrap(); + } + + #[tokio::test] + async fn connect_oauth_with_valid_cached_token_attempts_dial_not_needs_auth() { + // Valid token cached → connect() must NOT bounce at NeedsAuth. + // Dial reaches the dead address and fails at handshake — that + // failure surface is the proof the bearer was injected. + let cfg: McpConfig = serde_json::from_str(dead_oauth_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + seed_token(&mgr, "linear", u64::MAX); + let err = mgr.connect("linear").await.unwrap_err().to_string(); + assert!(err.contains("handshake"), "expected 'handshake' in {err}"); + match &mgr.statuses().await[0].1 { + ServerStatus::Failed(_) => {} + other => panic!("expected Failed, got {other:?}"), + } + } + + #[tokio::test] + async fn connect_oauth_with_expired_token_bounces_to_needs_auth() { + // Expired token (within REFRESH_SKEW_SECONDS of now, or past) must + // NOT be sent — refresh-rotation is a later slice. Treat as + // missing: bounce to NeedsAuth so `mcp login` is the actionable + // path the user sees. + let cfg: McpConfig = serde_json::from_str(dead_oauth_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + seed_token(&mgr, "linear", 0); + let err = mgr.connect("linear").await.unwrap_err().to_string(); + assert!(err.contains("needs oauth login"), "got: {err}"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + } + #[tokio::test] async fn finish_login_tolerates_provider_omitting_refresh_token() { let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); From 724feece86d541340cc08676d800278ed0baabf8 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 08:14:41 +0000 Subject: [PATCH 78/98] feat(openab-agent/mcp): refresh-token rotation in connect() oauth branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 / ADR §6.6: HTTP+oauth servers with an expired cached token now attempt RFC 6749 §6 refresh-grant before bouncing to NeedsAuth. On success the rotated TokenStore is fsynced and the dial proceeds with the new bearer; on failure the bounce is preserved so `mcp login` stays the user-actionable path. - post_token_refresh: sibling of post_token_exchange for grant_type= refresh_token (public client, no client_secret). - try_refresh_oauth_token: resolves client_id from current config (so a rotated catalog entry is picked up), POSTs the refresh, builds a new TokenStore preserving the previous refresh_token if the provider omits one (Google-style rotation), persists via save_namespaced_token_at. - DialPlan two-phase enum lets the inside-lock branch tag "needs refresh" and defer the async refresh to outside the write lock — slow refresh must not block concurrent `mcp status` reads. Tests: - connect_oauth_expired_no_refresh_token_bounces_to_needs_auth pins the empty-refresh short-circuit (no POST attempted). - connect_oauth_expired_with_refresh_token_failed_refresh_bounces_to_needs_auth proves the refresh path runs and that any failure surfaces as user-actionable NeedsAuth (current fixture's custom provider plus dead token endpoint both fail the refresh). Custom-provider refresh shares the same gap as custom-provider login — resolve_paste_client errors before the refresh POST runs. Will be unblocked by the same future slice that wires custom-provider login. --- openab-agent/src/mcp/runtime.rs | 165 +++++++++++++++++++++++++++----- 1 file changed, 142 insertions(+), 23 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index f6afa9c12..f2c996206 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -333,13 +333,46 @@ impl McpRuntimeManager { Ok((provider, client_id, redirect_uri)) } + /// RFC 6749 §6 refresh-grant — exchange a cached `refresh_token` for a + /// new `access_token`. Resolves `client_id` from current config (so a + /// rotated builtin catalog entry is picked up automatically). Per + /// ADR §6.6 rotation contract: if the provider omits a new + /// `refresh_token` in the response, the previous one is preserved + /// (Google-style rotation); the agent fsyncs `auth.json` before + /// returning so deployment-side mtime watchers can sync the rotated + /// token to peer replicas. + async fn try_refresh_oauth_token(&self, name: &str, store: &TokenStore) -> Result { + if store.refresh_token.is_empty() { + return Err(anyhow!("no refresh_token cached for {name:?}")); + } + let (_provider, client_id, _redirect_uri) = self.resolve_paste_client(name).await?; + let resp = + post_token_refresh(&store.token_endpoint, &client_id, &store.refresh_token).await?; + let new_refresh = resp + .refresh_token + .unwrap_or_else(|| store.refresh_token.clone()); + let expires_at = match resp.expires_in { + Some(secs) => now_secs() + secs, + None => u64::MAX, + }; + let new_store = TokenStore { + access_token: resp.access_token, + refresh_token: new_refresh, + expires_at, + token_endpoint: store.token_endpoint.clone(), + provider: store.provider.clone(), + }; + save_namespaced_token_at(&self.auth_path, name, &new_store)?; + Ok(new_store) + } + /// Lazy-connect the named server (ADR §5.7). Idempotent if already /// `Connected` with a live client. HTTP servers with an `oauth:` block /// are routed through `mcp login` first — `connect` marks them /// `NeedsAuth` and returns an error pointing the caller at the login /// subcommand rather than attempting an unauthenticated dial. pub async fn connect(&self, name: &str) -> Result<()> { - let dial = { + let plan = { let mut guard = self.handles.write().await; let handle = guard .get_mut(name) @@ -348,26 +381,27 @@ impl McpRuntimeManager { return Ok(()); } let resolved = handle.config.resolved(name)?; - let dial = match resolved { + let plan = match resolved { ServerConfig::Stdio { command, args, env, .. - } => Dial::Stdio { command, args, env }, - // Oauth-protected: dial with a cached bearer if one exists - // and is still within the refresh skew; otherwise bounce to - // `NeedsAuth` so `mcp status` shows a persistent "waiting - // for login" signal (vs `Disconnected`, which implies a - // plain `connect` would succeed). Refresh-token rotation - // for the expired-but-recoverable case is the next slice; - // for now an expired token falls through to NeedsAuth. + } => DialPlan::Dial(Dial::Stdio { command, args, env }), + // Oauth-protected: cached-valid → dial; expired but with a + // refresh_token → defer to outside-lock async refresh + // (`DialPlan::NeedsRefresh`); missing/expired-no-refresh + // → bounce to `NeedsAuth` so `mcp login` stays the user- + // actionable path. ServerConfig::Http { url, oauth: Some(_), .. } => match load_namespaced_token_at(&self.auth_path, name) { - Ok(store) if !is_expired(&store) => Dial::Http { + Ok(store) if !is_expired(&store) => DialPlan::Dial(Dial::Http { url, auth: Some(store.access_token), - }, + }), + Ok(store) if !store.refresh_token.is_empty() => { + DialPlan::NeedsRefresh { url, store } + } _ => { handle.status = ServerStatus::NeedsAuth; return Err(anyhow!( @@ -375,10 +409,34 @@ impl McpRuntimeManager { )); } }, - ServerConfig::Http { url, .. } => Dial::Http { url, auth: None }, + ServerConfig::Http { url, .. } => DialPlan::Dial(Dial::Http { url, auth: None }), }; handle.status = ServerStatus::Connecting; - dial + plan + }; + + // Resolve `NeedsRefresh` outside the write lock so a slow refresh + // doesn't block concurrent `mcp status` reads. Failed refresh → + // `NeedsAuth` (matching the missing-token bounce inside the lock). + let dial = match plan { + DialPlan::Dial(d) => d, + DialPlan::NeedsRefresh { url, store } => { + match self.try_refresh_oauth_token(name, &store).await { + Ok(new_store) => Dial::Http { + url, + auth: Some(new_store.access_token), + }, + Err(e) => { + let mut guard = self.handles.write().await; + if let Some(h) = guard.get_mut(name) { + h.status = ServerStatus::NeedsAuth; + } + return Err(anyhow!( + "mcp server {name:?} oauth refresh failed: {e:#} — run `mcp login {name}`" + )); + } + } + } }; let dial_result = dial.run().await; @@ -474,6 +532,44 @@ async fn post_token_exchange( serde_json::from_str(&body).map_err(|e| anyhow!("invalid token response: {e}; body={body}")) } +/// POST a refresh-grant to the OAuth 2.1 token endpoint per RFC 6749 §6. +/// Public client — no `client_secret`. Same response shape as the +/// auth-code exchange (`TokenExchangeResponse`). +async fn post_token_refresh( + token_url: &str, + client_id: &str, + refresh_token: &str, +) -> Result { + let client = reqwest::Client::builder() + .build() + .context("build reqwest client")?; + let resp = client + .post(token_url) + .form(&[ + ("grant_type", "refresh_token"), + ("refresh_token", refresh_token), + ("client_id", client_id), + ]) + .send() + .await + .with_context(|| format!("POST {token_url} (token refresh)"))?; + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + if !status.is_success() { + return Err(anyhow!("token endpoint returned {status}: {body}")); + } + serde_json::from_str(&body).map_err(|e| anyhow!("invalid token response: {e}; body={body}")) +} + +/// Two-phase plan for `connect()`: most server types resolve directly to +/// a `Dial`, but HTTP+oauth with an expired-but-refreshable token needs +/// async work (the refresh POST) before a `Dial` can be built. Keeping +/// the variant lets us release the write lock before the refresh. +enum DialPlan { + Dial(Dial), + NeedsRefresh { url: String, store: TokenStore }, +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -955,17 +1051,26 @@ mod tests { }"# } - fn seed_token(mgr: &McpRuntimeManager, name: &str, expires_at: u64) { + fn seed_token_with_refresh( + mgr: &McpRuntimeManager, + name: &str, + expires_at: u64, + refresh_token: &str, + ) { let store = TokenStore { access_token: format!("atok-{name}"), - refresh_token: "rtok".to_string(), + refresh_token: refresh_token.to_string(), expires_at, - token_endpoint: "https://api.linear.app/oauth/token".to_string(), + token_endpoint: "http://127.0.0.1:1/token".to_string(), provider: "linear".to_string(), }; save_namespaced_token_at(&mgr.auth_path, name, &store).unwrap(); } + fn seed_token(mgr: &McpRuntimeManager, name: &str, expires_at: u64) { + seed_token_with_refresh(mgr, name, expires_at, "rtok"); + } + #[tokio::test] async fn connect_oauth_with_valid_cached_token_attempts_dial_not_needs_auth() { // Valid token cached → connect() must NOT bounce at NeedsAuth. @@ -983,19 +1088,33 @@ mod tests { } #[tokio::test] - async fn connect_oauth_with_expired_token_bounces_to_needs_auth() { - // Expired token (within REFRESH_SKEW_SECONDS of now, or past) must - // NOT be sent — refresh-rotation is a later slice. Treat as - // missing: bounce to NeedsAuth so `mcp login` is the actionable - // path the user sees. + async fn connect_oauth_expired_no_refresh_token_bounces_to_needs_auth() { + // Expired token + empty refresh_token → no refresh attempt; + // bounce directly to NeedsAuth. Proves the empty-refresh guard + // short-circuits before the refresh POST. let cfg: McpConfig = serde_json::from_str(dead_oauth_cfg()).unwrap(); let (mgr, _dir) = mgr_with_tempdir(cfg); - seed_token(&mgr, "linear", 0); + seed_token_with_refresh(&mgr, "linear", 0, ""); let err = mgr.connect("linear").await.unwrap_err().to_string(); assert!(err.contains("needs oauth login"), "got: {err}"); assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); } + #[tokio::test] + async fn connect_oauth_expired_with_refresh_token_failed_refresh_bounces_to_needs_auth() { + // Expired token + non-empty refresh_token → refresh attempted; + // refresh fails (custom-provider not yet supported in this slice, + // or dead token_endpoint) → NeedsAuth bounce with refresh-failed + // message. Proves the refresh path runs and that any failure + // surfaces as user-actionable NeedsAuth. + let cfg: McpConfig = serde_json::from_str(dead_oauth_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + seed_token(&mgr, "linear", 0); + let err = mgr.connect("linear").await.unwrap_err().to_string(); + assert!(err.contains("oauth refresh failed"), "got: {err}"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + } + #[tokio::test] async fn finish_login_tolerates_provider_omitting_refresh_token() { let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); From 5ac278150e57d893e74f6e8afe0c54daf29f4bd2 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 08:16:47 +0000 Subject: [PATCH 79/98] fix(openab-agent/mcp): refresh-failed bounce respects concurrent Connected winner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mira + Kirin caught a race: two concurrent connect() calls on the same expired-token server can interleave so one wins the refresh + dial while the other's refresh fails — and the loser was unconditionally overwriting `Connected` with `NeedsAuth`, giving the user a false "please log in" signal on a healthy session. Guard the status mutation with `!matches!(h.status, Connected)` so a concurrent winner's terminal state is preserved. --- openab-agent/src/mcp/runtime.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index f2c996206..4b0b2dd18 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -429,7 +429,13 @@ impl McpRuntimeManager { Err(e) => { let mut guard = self.handles.write().await; if let Some(h) = guard.get_mut(name) { - h.status = ServerStatus::NeedsAuth; + // A concurrent connect() may have refreshed + + // dialed successfully while we were awaiting + // our (failed) refresh. Don't clobber the + // winner's Connected status with NeedsAuth. + if !matches!(h.status, ServerStatus::Connected) { + h.status = ServerStatus::NeedsAuth; + } } return Err(anyhow!( "mcp server {name:?} oauth refresh failed: {e:#} — run `mcp login {name}`" From 1e4863ffa030a90d31bf9efed656e2e44924177b Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 08:31:03 +0000 Subject: [PATCH 80/98] fix(openab-agent/mcp): saturating_add for refresh expires_at Mirrors finish_login (oauth.rs:271) so a hostile/buggy provider returning a huge expires_in can't trigger a debug-build overflow panic in the refresh-token rotation path. --- openab-agent/src/mcp/runtime.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 4b0b2dd18..31aef9748 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -352,7 +352,7 @@ impl McpRuntimeManager { .refresh_token .unwrap_or_else(|| store.refresh_token.clone()); let expires_at = match resp.expires_in { - Some(secs) => now_secs() + secs, + Some(secs) => now_secs().saturating_add(secs), None => u64::MAX, }; let new_store = TokenStore { From bf08fe27f8e2433d3e2c181131ce91ab79e4acf5 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 08:32:06 +0000 Subject: [PATCH 81/98] refactor(openab-agent/mcp): share post_token_form between exchange and refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit post_token_exchange and post_token_refresh diverged only in the form payload — same Client::builder() boilerplate, same status/body folding, same response shape. Extract post_token_form so each grant call site is just the RFC 6749 form tuple + a grant_label for the error context. Also drops a narrating comment in connect() that recapped the three match arms below it without adding any non-obvious context. --- openab-agent/src/mcp/runtime.rs | 74 ++++++++++++++++----------------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 31aef9748..c9862a329 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -385,11 +385,6 @@ impl McpRuntimeManager { ServerConfig::Stdio { command, args, env, .. } => DialPlan::Dial(Dial::Stdio { command, args, env }), - // Oauth-protected: cached-valid → dial; expired but with a - // refresh_token → defer to outside-lock async refresh - // (`DialPlan::NeedsRefresh`); missing/expired-no-refresh - // → bounce to `NeedsAuth` so `mcp login` stays the user- - // actionable path. ServerConfig::Http { url, oauth: Some(_), @@ -504,32 +499,24 @@ struct TokenExchangeResponse { expires_in: Option, } -/// POST the auth code to the OAuth 2.1 token endpoint per RFC 6749 -/// §4.1.3 + RFC 7636 §4.5 (PKCE verifier). Public client — no +/// Shared POST helper for both `post_token_exchange` (RFC 6749 §4.1.3) +/// and `post_token_refresh` (RFC 6749 §6). Public client — no /// `client_secret`. Errors fold body text into the message so transient /// 4xx from the provider land in the user's terminal verbatim. -async fn post_token_exchange( +async fn post_token_form( token_url: &str, - client_id: &str, - redirect_uri: &str, - code: &str, - code_verifier: &str, + form: &[(&str, &str)], + grant_label: &str, ) -> Result { let client = reqwest::Client::builder() .build() .context("build reqwest client")?; let resp = client .post(token_url) - .form(&[ - ("grant_type", "authorization_code"), - ("code", code), - ("code_verifier", code_verifier), - ("client_id", client_id), - ("redirect_uri", redirect_uri), - ]) + .form(form) .send() .await - .with_context(|| format!("POST {token_url} (token exchange)"))?; + .with_context(|| format!("POST {token_url} ({grant_label})"))?; let status = resp.status(); let body = resp.text().await.unwrap_or_default(); if !status.is_success() { @@ -538,33 +525,42 @@ async fn post_token_exchange( serde_json::from_str(&body).map_err(|e| anyhow!("invalid token response: {e}; body={body}")) } -/// POST a refresh-grant to the OAuth 2.1 token endpoint per RFC 6749 §6. -/// Public client — no `client_secret`. Same response shape as the -/// auth-code exchange (`TokenExchangeResponse`). +async fn post_token_exchange( + token_url: &str, + client_id: &str, + redirect_uri: &str, + code: &str, + code_verifier: &str, +) -> Result { + post_token_form( + token_url, + &[ + ("grant_type", "authorization_code"), + ("code", code), + ("code_verifier", code_verifier), + ("client_id", client_id), + ("redirect_uri", redirect_uri), + ], + "token exchange", + ) + .await +} + async fn post_token_refresh( token_url: &str, client_id: &str, refresh_token: &str, ) -> Result { - let client = reqwest::Client::builder() - .build() - .context("build reqwest client")?; - let resp = client - .post(token_url) - .form(&[ + post_token_form( + token_url, + &[ ("grant_type", "refresh_token"), ("refresh_token", refresh_token), ("client_id", client_id), - ]) - .send() - .await - .with_context(|| format!("POST {token_url} (token refresh)"))?; - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - if !status.is_success() { - return Err(anyhow!("token endpoint returned {status}: {body}")); - } - serde_json::from_str(&body).map_err(|e| anyhow!("invalid token response: {e}; body={body}")) + ], + "token refresh", + ) + .await } /// Two-phase plan for `connect()`: most server types resolve directly to From 27abdbfcd5a501b9c5ede1bd6a6b98afbbaa699a Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 08:44:26 +0000 Subject: [PATCH 82/98] feat(openab-agent/mcp): device-code OAuth POST primitives (RFC 8628) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the §3.1 device authorization request + §3.4 polling tick HTTP helpers — pure data plumbing, no runtime wiring yet. The polling-tick response classifier is split out as a pure fn (classify_device_poll) so the §3.5 error-code → flow-state mapping (authorization_pending, slow_down, access_denied, expired_token) is unit-testable without a mock HTTP server. DeviceAuthResponse defaults interval to 5s per RFC 8628 §3.5 when the provider omits it. verification_uri_complete (§3.3.1 extension) is Option since verification_uri + user_code is the always-present fallback. Both POST fns + the two types are #[allow(dead_code)] — next slice wires start_device_login() and the polling loop into McpRuntimeManager. --- openab-agent/src/mcp/runtime.rs | 182 ++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index c9862a329..db338b8c3 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -563,6 +563,124 @@ async fn post_token_refresh( .await } +/// RFC 8628 §3.2 device authorization response. `verification_uri_complete` +/// is the §3.3.1 extension (`verification_uri` + `user_code` is the always- +/// present fallback the agent relays to the user). `interval` defaults to +/// 5s per RFC 8628 §3.5 when omitted by the provider. +#[derive(Debug, serde::Deserialize)] +#[allow(dead_code)] +struct DeviceAuthResponse { + device_code: String, + user_code: String, + verification_uri: String, + #[serde(default)] + verification_uri_complete: Option, + expires_in: u64, + #[serde(default = "default_device_poll_interval")] + interval: u64, +} + +fn default_device_poll_interval() -> u64 { + 5 +} + +/// RFC 8628 §3.5 polling outcome. The four named "errors" +/// (`authorization_pending`, `slow_down`, `access_denied`, `expired_token`) +/// are flow-level states NOT real failures — they drive the polling loop. +/// Everything else folds into a fatal `Err` at the call site. +#[derive(Debug)] +#[allow(dead_code)] +enum DevicePollOutcome { + Success(TokenExchangeResponse), + AuthorizationPending, + SlowDown, + AccessDenied, + ExpiredToken, +} + +/// Pure response classifier — split from the HTTP path so the RFC 8628 +/// §3.5 error-code mapping is unit-testable without a mock server. 2xx +/// parses as a token response; 4xx parses `{"error": "..."}` and maps the +/// four flow-state codes to enum variants; everything else (including +/// non-JSON / unknown error codes) folds into `Err`. +fn classify_device_poll(status: reqwest::StatusCode, body: &str) -> Result { + if status.is_success() { + return serde_json::from_str(body) + .map(DevicePollOutcome::Success) + .map_err(|e| anyhow!("invalid token response: {e}; body={body}")); + } + #[derive(serde::Deserialize)] + struct ErrBody { + error: String, + } + let err_code = serde_json::from_str::(body).ok().map(|e| e.error); + match err_code.as_deref() { + Some("authorization_pending") => Ok(DevicePollOutcome::AuthorizationPending), + Some("slow_down") => Ok(DevicePollOutcome::SlowDown), + Some("access_denied") => Ok(DevicePollOutcome::AccessDenied), + Some("expired_token") => Ok(DevicePollOutcome::ExpiredToken), + _ => Err(anyhow!("token endpoint returned {status}: {body}")), + } +} + +/// POST to the RFC 8628 §3.1 device authorization endpoint. Public client +/// — no `client_secret`. Returns the `{device_code, user_code, ...}` +/// bundle the runtime relays to the user and polls against the token +/// endpoint via `post_device_token_poll`. +#[allow(dead_code)] +async fn post_device_authorization( + device_endpoint: &str, + client_id: &str, + scopes: &str, +) -> Result { + let client = reqwest::Client::builder() + .build() + .context("build reqwest client")?; + let resp = client + .post(device_endpoint) + .form(&[("client_id", client_id), ("scope", scopes)]) + .send() + .await + .with_context(|| format!("POST {device_endpoint} (device authorization)"))?; + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + if !status.is_success() { + return Err(anyhow!( + "device authorization endpoint returned {status}: {body}" + )); + } + serde_json::from_str(&body) + .map_err(|e| anyhow!("invalid device authorization response: {e}; body={body}")) +} + +/// POST one polling tick to the token endpoint per RFC 8628 §3.4. Caller +/// owns the polling loop (interval, expires_in deadline, SlowDown back- +/// off). Returns a `DevicePollOutcome` so the loop can distinguish the +/// four RFC 8628 §3.5 flow states from real errors. +#[allow(dead_code)] +async fn post_device_token_poll( + token_url: &str, + client_id: &str, + device_code: &str, +) -> Result { + let client = reqwest::Client::builder() + .build() + .context("build reqwest client")?; + let resp = client + .post(token_url) + .form(&[ + ("grant_type", "urn:ietf:params:oauth:grant-type:device_code"), + ("device_code", device_code), + ("client_id", client_id), + ]) + .send() + .await + .with_context(|| format!("POST {token_url} (device token poll)"))?; + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + classify_device_poll(status, &body) +} + /// Two-phase plan for `connect()`: most server types resolve directly to /// a `Dial`, but HTTP+oauth with an expired-but-refreshable token needs /// async work (the refresh POST) before a `Dial` can be built. Keeping @@ -1136,4 +1254,68 @@ mod tests { // first use (Mira Tick 46 catch). assert_eq!(token.expires_at, u64::MAX); } + + #[test] + fn classify_device_poll_decodes_success_into_token() { + let body = r#"{"access_token": "atk", "refresh_token": "rtk", "expires_in": 3600}"#; + let outcome = classify_device_poll(reqwest::StatusCode::OK, body).unwrap(); + let DevicePollOutcome::Success(token) = outcome else { + panic!("expected Success"); + }; + assert_eq!(token.access_token, "atk"); + assert_eq!(token.refresh_token.as_deref(), Some("rtk")); + assert_eq!(token.expires_in, Some(3600)); + } + + #[test] + fn classify_device_poll_maps_rfc8628_flow_states() { + let cases = [ + ("authorization_pending", "AuthorizationPending"), + ("slow_down", "SlowDown"), + ("access_denied", "AccessDenied"), + ("expired_token", "ExpiredToken"), + ]; + for (code, want) in cases { + let body = format!(r#"{{"error": "{code}"}}"#); + let outcome = classify_device_poll(reqwest::StatusCode::BAD_REQUEST, &body).unwrap(); + let got = match outcome { + DevicePollOutcome::AuthorizationPending => "AuthorizationPending", + DevicePollOutcome::SlowDown => "SlowDown", + DevicePollOutcome::AccessDenied => "AccessDenied", + DevicePollOutcome::ExpiredToken => "ExpiredToken", + DevicePollOutcome::Success(_) => "Success", + }; + assert_eq!(got, want, "code={code}"); + } + } + + #[test] + fn classify_device_poll_folds_unknown_error_into_err() { + let body = r#"{"error": "invalid_grant"}"#; + let err = classify_device_poll(reqwest::StatusCode::BAD_REQUEST, body) + .unwrap_err() + .to_string(); + assert!(err.contains("invalid_grant"), "got: {err}"); + } + + #[test] + fn classify_device_poll_folds_non_json_5xx_into_err() { + let err = classify_device_poll(reqwest::StatusCode::INTERNAL_SERVER_ERROR, "") + .unwrap_err() + .to_string(); + assert!(err.contains("500"), "got: {err}"); + } + + #[test] + fn device_auth_response_defaults_interval_to_rfc8628_value() { + let body = r#"{ + "device_code": "dc", + "user_code": "AAAA-BBBB", + "verification_uri": "https://example.com/device", + "expires_in": 1800 + }"#; + let resp: DeviceAuthResponse = serde_json::from_str(body).unwrap(); + assert_eq!(resp.interval, 5); + assert!(resp.verification_uri_complete.is_none()); + } } From 9edf3a618a7818243e0ea6838ed4f37462d822fb Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 08:59:34 +0000 Subject: [PATCH 83/98] feat(openab-agent/mcp): device-flow runtime wiring (RFC 8628 polling loop) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `start_device_login(name)`: POST §3.1 + spawn detached §3.4 polling task - `DeviceLoginStart`: public user-facing bundle (user_code / verification_uri / verification_uri_complete / expires_in) - `resolve_device_client`: Custom-provider-only gate with explicit error for missing `device_authorization_endpoint` / `client_id` - `run_device_poll_loop`: deadline check + SlowDown back-off (+5s) + success persistence + NeedsAuth on terminal failure - `finalize_device_login` / `mark_device_login_failed`: status transitions (Disconnected on success keeps MCP handshake out of detached task; next `connect()` picks up the cached token via the oauth-aware DialPlan) - 4 gating tests: unknown server / stdio / missing device endpoint / HTTP reach #[allow(dead_code)] on the new surface until the next slice wires `mcp login --device` CLI. Quick gate green. --- openab-agent/src/mcp/runtime.rs | 295 ++++++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index db338b8c3..13056946f 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -85,6 +85,20 @@ pub struct PasteLoginStart { pub state: String, } +/// Public return of `start_device_login` (RFC 8628 §3.2 user-facing +/// bundle). `verification_uri_complete` is the §3.3.1 extension that +/// pre-fills the user_code into the QR/link target; clients should +/// prefer it when present and fall back to the +/// `verification_uri` + `user_code` pair. +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct DeviceLoginStart { + pub user_code: String, + pub verification_uri: String, + pub verification_uri_complete: Option, + pub expires_in: u64, +} + /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. #[derive(Debug, Clone)] @@ -287,6 +301,210 @@ impl McpRuntimeManager { Ok(()) } + /// Begin a device-code OAuth login (ADR §6.4 + RFC 8628) for an HTTP + /// server whose `oauth:` block declares a `device_authorization_endpoint` + /// (§6.3). Built-in providers don't yet ship device endpoints — that + /// requires a `ProviderSpec` schema extension (out of scope this slice). + /// + /// 1. POST RFC 8628 §3.1 device authorization → user_code + + /// verification_uri + interval + expires_in + /// 2. Spawn a detached `tokio::task` that drives the §3.4 polling loop, + /// persists the `TokenStore` on success, and writes server status + /// (`Disconnected` on success so the next `connect()` picks up the + /// cached token; `NeedsAuth` on terminal failure) + /// 3. Return the user-facing bundle (the polling task is fire-and- + /// forget — observed via `mcp status`) + /// + /// Choosing `Disconnected` over the ADR's "transitions to Connected" + /// keeps the polling task out of the MCP handshake path. The next + /// `connect()` reads the cached token via the oauth-aware `DialPlan` + /// branch and reaches `Connected` through the normal lifecycle. + #[allow(dead_code)] + pub async fn start_device_login(&self, name: &str) -> Result { + let (device_endpoint, client_id, token_url, scopes, provider_name) = + self.resolve_device_client(name).await?; + let auth = + post_device_authorization(&device_endpoint, &client_id, &scopes.join(" ")).await?; + { + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::Connecting; + } + } + let manager = self.clone(); + let name_owned = name.to_string(); + let device_code = auth.device_code.clone(); + let initial_interval = auth.interval; + let expires_in = auth.expires_in; + let token_url_owned = token_url; + let client_id_owned = client_id; + let provider_name_owned = provider_name; + tokio::spawn(async move { + manager + .run_device_poll_loop( + &name_owned, + &token_url_owned, + &client_id_owned, + &device_code, + &provider_name_owned, + initial_interval, + expires_in, + ) + .await; + }); + Ok(DeviceLoginStart { + user_code: auth.user_code, + verification_uri: auth.verification_uri, + verification_uri_complete: auth.verification_uri_complete, + expires_in: auth.expires_in, + }) + } + + /// Resolve `(device_endpoint, client_id, token_url, scopes, provider_name)` + /// for `name`. Rejects non-Http / non-oauth / built-in / missing-endpoint + /// configurations with explicit errors so the user sees what to fix in + /// `mcp.json`. + #[allow(dead_code)] + async fn resolve_device_client( + &self, + name: &str, + ) -> Result<(String, String, String, Vec, String)> { + let oauth_cfg = { + let guard = self.handles.read().await; + let handle = guard + .get(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + match handle.config.resolved(name)? { + ServerConfig::Http { + oauth: Some(oauth), .. + } => oauth, + ServerConfig::Http { oauth: None, .. } => { + return Err(anyhow!("mcp server {name:?} has no oauth block")); + } + ServerConfig::Stdio { .. } => { + return Err(anyhow!("mcp server {name:?} is stdio, not http+oauth")); + } + } + }; + let provider = resolve(&oauth_cfg)?; + let ResolvedProvider::Custom { + provider_name, + token_url, + client_id: Some(client_id), + device_authorization_endpoint: Some(device_endpoint), + scopes, + .. + } = provider + else { + return Err(anyhow!( + "mcp server {name:?} device-flow requires a Custom provider with \ + both `oauth.device_authorization_endpoint` and `oauth.client_id` \ + set in mcp.json" + )); + }; + Ok((device_endpoint, client_id, token_url, scopes, provider_name)) + } + + /// RFC 8628 §3.4 polling loop. Runs detached in `tokio::spawn`; the + /// only observable side-effect is `auth.json` (on Success) + the + /// `ServerHandle.status` transition. Errors are logged via `tracing` + /// and surface to the user via `mcp status` (Failed/NeedsAuth). + #[allow(dead_code, clippy::too_many_arguments)] + async fn run_device_poll_loop( + &self, + name: &str, + token_url: &str, + client_id: &str, + device_code: &str, + provider_name: &str, + initial_interval: u64, + expires_in_secs: u64, + ) { + let deadline = now_secs().saturating_add(expires_in_secs); + let mut interval = initial_interval; + loop { + tokio::time::sleep(std::time::Duration::from_secs(interval)).await; + if now_secs() >= deadline { + self.mark_device_login_failed( + name, + anyhow!("device-flow expired before user authorized"), + ) + .await; + return; + } + let outcome = match post_device_token_poll(token_url, client_id, device_code).await { + Ok(o) => o, + Err(e) => { + self.mark_device_login_failed(name, e).await; + return; + } + }; + match outcome { + DevicePollOutcome::Success(resp) => { + self.finalize_device_login(name, provider_name, token_url, resp) + .await; + return; + } + DevicePollOutcome::AuthorizationPending => continue, + DevicePollOutcome::SlowDown => { + // RFC 8628 §3.5: SlowDown means add 5s to the interval. + interval = interval.saturating_add(5); + } + DevicePollOutcome::AccessDenied => { + self.mark_device_login_failed(name, anyhow!("device-flow denied by user")) + .await; + return; + } + DevicePollOutcome::ExpiredToken => { + self.mark_device_login_failed(name, anyhow!("device_code expired")) + .await; + return; + } + } + } + } + + /// Pure-persistence tail of `run_device_poll_loop` on RFC 8628 §3.5 + /// Success. Mirrors `finish_login`'s `u64::MAX` sentinel for absent + /// `expires_in` (Mira Tick 46 catch). + #[allow(dead_code)] + async fn finalize_device_login( + &self, + name: &str, + provider_name: &str, + token_url: &str, + resp: TokenExchangeResponse, + ) { + let expires_at = match resp.expires_in { + Some(secs) => now_secs().saturating_add(secs), + None => u64::MAX, + }; + let store = TokenStore { + access_token: resp.access_token, + refresh_token: resp.refresh_token.unwrap_or_default(), + expires_at, + token_endpoint: token_url.to_string(), + provider: provider_name.to_string(), + }; + if let Err(e) = save_namespaced_token_at(&self.auth_path, name, &store) { + self.mark_device_login_failed(name, e).await; + return; + } + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::Disconnected; + } + } + + #[allow(dead_code)] + async fn mark_device_login_failed(&self, name: &str, err: anyhow::Error) { + tracing::warn!(server = %name, error = %err, "device-flow polling failed"); + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::NeedsAuth; + } + } + /// Resolve a paste-back OAuth client `(provider, client_id, redirect_uri)` /// from the server's config. Shared by `start_paste_login` and /// `complete_login` so a config drift between init and finish surfaces @@ -580,6 +798,7 @@ struct DeviceAuthResponse { interval: u64, } +#[allow(dead_code)] fn default_device_poll_interval() -> u64 { 5 } @@ -603,6 +822,7 @@ enum DevicePollOutcome { /// parses as a token response; 4xx parses `{"error": "..."}` and maps the /// four flow-state codes to enum variants; everything else (including /// non-JSON / unknown error codes) folds into `Err`. +#[allow(dead_code)] fn classify_device_poll(status: reqwest::StatusCode, body: &str) -> Result { if status.is_success() { return serde_json::from_str(body) @@ -1318,4 +1538,79 @@ mod tests { assert_eq!(resp.interval, 5); assert!(resp.verification_uri_complete.is_none()); } + + fn linear_device_cfg() -> &'static str { + // 127.0.0.1:1 dials hermetically so tests can prove + // start_device_login() reached the device-authorization POST — + // i.e. config validation passed — without a network round-trip. + r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { + "provider": "linear", + "authorize_url": "https://linear.app/oauth/authorize", + "token_url": "https://api.linear.app/oauth/token", + "device_authorization_endpoint": "http://127.0.0.1:1/device", + "client_id": "linear-client", + "scopes": ["read"] + } + } + } + }"# + } + + async fn start_device_err(mgr: &McpRuntimeManager, name: &str) -> String { + mgr.start_device_login(name).await.unwrap_err().to_string() + } + + #[tokio::test] + async fn start_device_login_rejects_unknown_server() { + let cfg: McpConfig = serde_json::from_str(linear_device_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let err = start_device_err(&mgr, "ghost").await; + assert!(err.contains("ghost"), "got: {err}"); + } + + #[tokio::test] + async fn start_device_login_rejects_stdio_server() { + let json = r#"{ + "mcpServers": { + "fs": { + "type": "stdio", + "command": "/bin/true" + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let err = start_device_err(&mgr, "fs").await; + assert!(err.contains("stdio"), "got: {err}"); + } + + #[tokio::test] + async fn start_device_login_rejects_custom_without_device_endpoint() { + // linear_custom_cfg omits `device_authorization_endpoint` — the + // paste-back fixture from earlier slices doubles as the negative + // case here. + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let err = start_device_err(&mgr, "linear").await; + assert!(err.contains("device_authorization_endpoint"), "got: {err}"); + } + + #[tokio::test] + async fn start_device_login_with_device_endpoint_reaches_http_post() { + // Config validation passes (Custom + device_endpoint + client_id all + // present) so the failure must come from the POST itself — proves + // the gate didn't short-circuit before dial. + let cfg: McpConfig = serde_json::from_str(linear_device_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let err = start_device_err(&mgr, "linear").await; + assert!( + !err.contains("device_authorization_endpoint"), + "config validation should have passed; got: {err}" + ); + } } From 354947d2dd1d7f65a079536edb3019cd219a4ce6 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 09:43:23 +0000 Subject: [PATCH 84/98] feat(openab-agent/mcp): `mcp login --device` CLI surface (RFC 8628 UX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - main.rs: add `--device` flag to `McpAction::Login` (conflicts_with `paste`) - mcp/mod.rs: `cli_login_device(name)` — prints verification_uri_complete (§3.3.1) when present, falls back to `verification_uri` + `user_code`; watches `statuses()` until Disconnected (success) / NeedsAuth (terminal failure) or the provider-declared `expires_in` deadline elapses - runtime.rs: drop `#[allow(dead_code)]` on the Tick-55 device-flow chain (`start_device_login` is now called from the CLI surface) --- openab-agent/src/main.rs | 18 +++++++++- openab-agent/src/mcp/mod.rs | 63 +++++++++++++++++++++++++++++++++ openab-agent/src/mcp/runtime.rs | 13 +------ 3 files changed, 81 insertions(+), 13 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 018d55424..12d63c053 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -66,6 +66,12 @@ enum McpAction { /// form to keep `code` + `state` out of shell history and `ps`. #[arg(long, value_name = "URL")] paste: Option, + /// Use RFC 8628 device-code flow instead of paste-back. Requires + /// the server's `oauth:` block to declare a + /// `device_authorization_endpoint`. Useful for headless / remote + /// hosts where the browser redirect target isn't reachable. + #[arg(long, conflicts_with = "paste")] + device: bool, }, } @@ -120,7 +126,17 @@ async fn main() { McpAction::List { resolve } => mcp::cli_list_servers(resolve), McpAction::Status => mcp::cli_show_status().await, McpAction::Connect { name } => mcp::cli_connect(name).await, - McpAction::Login { name, paste } => mcp::cli_login(name, paste).await, + McpAction::Login { + name, + paste, + device, + } => { + if device { + mcp::cli_login_device(name).await; + } else { + mcp::cli_login(name, paste).await; + } + } }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index b9d6c8be3..bff844b19 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -231,6 +231,69 @@ pub async fn cli_login(name: String, paste: Option) { } } +/// `openab-agent mcp login --device`. Drives the RFC 8628 +/// device-code flow end-to-end: +/// +/// 1. `start_device_login` POSTs the §3.1 device authorization request, +/// prints the verification URL + user code, and spawns the §3.4 +/// polling task in the background +/// 2. This CLI polls `statuses()` until the server transitions away from +/// `Connecting` — `Disconnected` means the polling task persisted the +/// `TokenStore` (next `connect()` picks it up); `NeedsAuth` means the +/// flow terminally failed (access_denied / expired_token / network) +/// +/// Wall-clock timeout = `expires_in` returned by the provider. Polling +/// happens in the runtime-spawned task; this loop only watches status, +/// so the user can `Ctrl-C` the CLI without leaking pending state — the +/// detached task dies with the process and `auth.json` stays clean. +pub async fn cli_login_device(name: String) { + let manager = McpRuntimeManager::from_config(load_config_or_exit()); + let start = match manager.start_device_login(&name).await { + Ok(s) => s, + Err(e) => { + eprintln!("✗ {name}: {e:#}"); + std::process::exit(1); + } + }; + println!(); + if let Some(complete) = &start.verification_uri_complete { + println!("Open this URL in a browser (pre-filled with user code):"); + println!(); + println!(" {complete}"); + println!(); + } + println!("Or open the verification URL and enter the user code:"); + println!(); + println!(" URL: {}", start.verification_uri); + println!(" User code: {}", start.user_code); + println!(); + println!( + "Waiting for authorization (timeout: {}s)...", + start.expires_in + ); + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(start.expires_in); + loop { + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + if std::time::Instant::now() >= deadline { + eprintln!("✗ device-flow timed out (no user authorization)"); + std::process::exit(1); + } + let statuses = manager.statuses().await; + let status = statuses.iter().find(|(n, _)| n == &name).map(|(_, s)| s); + match status { + Some(runtime::ServerStatus::Disconnected) => { + println!("● logged in: {name}"); + return; + } + Some(runtime::ServerStatus::NeedsAuth) => { + eprintln!("✗ device-flow failed (run `mcp status` / check logs)"); + std::process::exit(1); + } + _ => continue, + } + } +} + fn read_redirect_from_stdin() -> std::io::Result { use std::io::Write; print!("Paste the FULL redirect URL: "); diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 13056946f..0d87f8906 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -91,7 +91,6 @@ pub struct PasteLoginStart { /// prefer it when present and fall back to the /// `verification_uri` + `user_code` pair. #[derive(Debug, Clone)] -#[allow(dead_code)] pub struct DeviceLoginStart { pub user_code: String, pub verification_uri: String, @@ -319,7 +318,6 @@ impl McpRuntimeManager { /// keeps the polling task out of the MCP handshake path. The next /// `connect()` reads the cached token via the oauth-aware `DialPlan` /// branch and reaches `Connected` through the normal lifecycle. - #[allow(dead_code)] pub async fn start_device_login(&self, name: &str) -> Result { let (device_endpoint, client_id, token_url, scopes, provider_name) = self.resolve_device_client(name).await?; @@ -364,7 +362,6 @@ impl McpRuntimeManager { /// for `name`. Rejects non-Http / non-oauth / built-in / missing-endpoint /// configurations with explicit errors so the user sees what to fix in /// `mcp.json`. - #[allow(dead_code)] async fn resolve_device_client( &self, name: &str, @@ -409,7 +406,7 @@ impl McpRuntimeManager { /// only observable side-effect is `auth.json` (on Success) + the /// `ServerHandle.status` transition. Errors are logged via `tracing` /// and surface to the user via `mcp status` (Failed/NeedsAuth). - #[allow(dead_code, clippy::too_many_arguments)] + #[allow(clippy::too_many_arguments)] async fn run_device_poll_loop( &self, name: &str, @@ -467,7 +464,6 @@ impl McpRuntimeManager { /// Pure-persistence tail of `run_device_poll_loop` on RFC 8628 §3.5 /// Success. Mirrors `finish_login`'s `u64::MAX` sentinel for absent /// `expires_in` (Mira Tick 46 catch). - #[allow(dead_code)] async fn finalize_device_login( &self, name: &str, @@ -496,7 +492,6 @@ impl McpRuntimeManager { } } - #[allow(dead_code)] async fn mark_device_login_failed(&self, name: &str, err: anyhow::Error) { tracing::warn!(server = %name, error = %err, "device-flow polling failed"); let mut handles = self.handles.write().await; @@ -786,7 +781,6 @@ async fn post_token_refresh( /// present fallback the agent relays to the user). `interval` defaults to /// 5s per RFC 8628 §3.5 when omitted by the provider. #[derive(Debug, serde::Deserialize)] -#[allow(dead_code)] struct DeviceAuthResponse { device_code: String, user_code: String, @@ -798,7 +792,6 @@ struct DeviceAuthResponse { interval: u64, } -#[allow(dead_code)] fn default_device_poll_interval() -> u64 { 5 } @@ -808,7 +801,6 @@ fn default_device_poll_interval() -> u64 { /// are flow-level states NOT real failures — they drive the polling loop. /// Everything else folds into a fatal `Err` at the call site. #[derive(Debug)] -#[allow(dead_code)] enum DevicePollOutcome { Success(TokenExchangeResponse), AuthorizationPending, @@ -822,7 +814,6 @@ enum DevicePollOutcome { /// parses as a token response; 4xx parses `{"error": "..."}` and maps the /// four flow-state codes to enum variants; everything else (including /// non-JSON / unknown error codes) folds into `Err`. -#[allow(dead_code)] fn classify_device_poll(status: reqwest::StatusCode, body: &str) -> Result { if status.is_success() { return serde_json::from_str(body) @@ -847,7 +838,6 @@ fn classify_device_poll(status: reqwest::StatusCode, body: &str) -> Result Date: Mon, 1 Jun 2026 09:53:04 +0000 Subject: [PATCH 85/98] feat(openab-agent/mcp): custom-provider paste-back support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - config.rs: add `oauth.redirect_uri: Option` to `OAuthConfig` for the paste-back branch on custom providers (built-ins pin their callback in `ProviderSpec`; ignored by the device-code branch) - oauth.rs: propagate `redirect_uri` through `ResolvedProvider::Custom` - runtime.rs: in `resolve_paste_client`, accept Custom + client_id + redirect_uri; emit precise per-missing-field error otherwise: - device endpoint present → "use device flow" (preserved) - missing client_id → "requires `oauth.client_id`" - missing redirect_uri → "requires `oauth.redirect_uri` (must match the redirect URL pre-registered with the provider)" - Tests: rename `_rejects_custom_provider_for_now` → `_rejects_custom_without_redirect_uri`; add `_rejects_custom_without_client_id` + success-path `_custom_with_client_id_and_redirect_uri_succeeds` Closes the last Phase 2 paste-back functional gap. ADR §6.3 enumerates optional custom fields but doesn't currently list `redirect_uri`; follow-up ADR amendment lands separately. --- openab-agent/src/mcp/config.rs | 6 +++ openab-agent/src/mcp/oauth.rs | 3 ++ openab-agent/src/mcp/runtime.rs | 74 +++++++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 4 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index db18a9779..b4be91486 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -78,6 +78,12 @@ pub struct OAuthConfig { pub client_id: Option, #[serde(default)] pub device_authorization_endpoint: Option, + /// Required for the paste-back branch of §6.4 on custom providers. + /// Must match what's pre-registered with the provider's OAuth app + /// (built-ins pin their callback in `ProviderSpec`). Ignored by the + /// device-code branch. + #[serde(default)] + pub redirect_uri: Option, #[serde(default)] pub discovery: bool, #[serde(default)] diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index f3ea31661..60052f98a 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -101,6 +101,7 @@ pub enum ResolvedProvider { token_url: String, client_id: Option, device_authorization_endpoint: Option, + redirect_uri: Option, scopes: Vec, }, } @@ -176,6 +177,7 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result token_url, client_id: cfg.client_id.clone(), device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), + redirect_uri: cfg.redirect_uri.clone(), scopes: cfg.scopes.clone(), }) } @@ -289,6 +291,7 @@ mod tests { client_id, device_authorization_endpoint, scopes, + .. } = resolve(&cfg).unwrap() else { panic!("expected Custom variant"); diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 0d87f8906..79318680b 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -537,9 +537,24 @@ impl McpRuntimeManager { "mcp server {name:?} has a device endpoint; use device flow" )); } - ResolvedProvider::Custom { .. } => { + ResolvedProvider::Custom { + client_id: Some(client_id), + redirect_uri: Some(redirect_uri), + .. + } => (client_id.clone(), redirect_uri.clone()), + ResolvedProvider::Custom { + client_id: None, .. + } => { return Err(anyhow!( - "mcp server {name:?}: custom-provider paste-back not yet supported" + "mcp server {name:?} custom paste-back requires `oauth.client_id` in mcp.json" + )); + } + ResolvedProvider::Custom { + redirect_uri: None, .. + } => { + return Err(anyhow!( + "mcp server {name:?} custom paste-back requires `oauth.redirect_uri` in mcp.json \ + (must match the redirect URL pre-registered with the provider)" )); } }; @@ -1175,14 +1190,65 @@ mod tests { } #[tokio::test] - async fn start_paste_login_rejects_custom_provider_for_now() { + async fn start_paste_login_rejects_custom_without_redirect_uri() { let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); let (mgr, _dir) = mgr_with_tempdir(cfg); let err = start_login_err(&mgr, "linear").await; - assert!(err.contains("custom-provider"), "got: {err}"); + assert!(err.contains("oauth.redirect_uri"), "got: {err}"); + assert!(mgr.pending_paste_login("linear").await.is_none()); + } + + #[tokio::test] + async fn start_paste_login_rejects_custom_without_client_id() { + let json = r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { + "provider": "linear", + "authorize_url": "https://linear.app/oauth/authorize", + "token_url": "https://api.linear.app/oauth/token", + "redirect_uri": "https://example.com/cb" + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let err = start_login_err(&mgr, "linear").await; + assert!(err.contains("oauth.client_id"), "got: {err}"); assert!(mgr.pending_paste_login("linear").await.is_none()); } + #[tokio::test] + async fn start_paste_login_custom_with_client_id_and_redirect_uri_succeeds() { + let json = r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { + "provider": "linear", + "authorize_url": "https://linear.app/oauth/authorize", + "token_url": "https://api.linear.app/oauth/token", + "client_id": "linear-client", + "redirect_uri": "https://example.com/cb", + "scopes": ["read"] + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + let start = mgr.start_paste_login("linear").await.unwrap(); + assert!(start.authorize_url.contains("client_id=linear-client")); + assert!(start.authorize_url.contains("redirect_uri=https")); + let pending = mgr.pending_paste_login("linear").await.unwrap(); + assert_eq!(pending.state, start.state); + assert_eq!(pending.provider_name, "linear"); + } + #[tokio::test] async fn start_paste_login_rejects_custom_with_device_endpoint() { let json = r#"{ From f6a027a17e89e8e7d5e9b5e5c77d66f8ce554962 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 10:00:36 +0000 Subject: [PATCH 86/98] =?UTF-8?q?docs(adr/mcp):=20=C2=A76.3=20add=20`oauth?= =?UTF-8?q?.redirect=5Furi=3F`=20for=20custom=20paste-back?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the field to the §6.3 config-shape list and a follow-up paragraph explaining why custom paste-back requires it (no local listener; must match provider OAuth app registration) and why the device-code / built-in branches ignore it. Follow-up to 84837ee (custom-provider paste-back support); Mira + Kirin dual sign-off requested this as a non-blocking docs amendment. --- docs/adr/openab-agent-mcp.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index fc4c895b3..1661cbbda 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -527,7 +527,9 @@ Callback values apply when the browser flow is engaged (`--browser` / `$DISPLAY` ### 6.3 Custom provider extension point -Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint?, discovery?, discovery_allowlist? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set, §6.4 device-code flow is preferred over paste-back. RFC 8414 dynamic discovery is opt-in only and requires an allowlist — see §6.4. +Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint?, redirect_uri?, discovery?, discovery_allowlist? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set, §6.4 device-code flow is preferred over paste-back. RFC 8414 dynamic discovery is opt-in only and requires an allowlist — see §6.4. + +`oauth.redirect_uri` is required by the paste-back branch of §6.4 for custom providers — it must match the URL pre-registered with the provider's OAuth app, since custom paste-back doesn't bind a local listener (built-ins pin their callback in `ProviderSpec`; the device-code branch ignores it). ### 6.4 Agent-guided OAuth flow (default) From e5ea59f0501dfbe0e15d34a09cdc155c9369421c Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 10:13:23 +0000 Subject: [PATCH 87/98] refactor(openab-agent): inline TokenStore::is_expired method - auth.rs: convert free function `is_expired(&TokenStore) -> bool` into `impl TokenStore { pub fn is_expired(&self) -> bool }`. Switch the skew addition to `saturating_add` for parity with the rest of the OAuth arithmetic (defends `u64::MAX` "never expires" sentinel + any near-overflow clock value). - auth.rs: fold `show_status`'s inline duplicate of the expiry formula into the new method. - mcp/runtime.rs: drop the free-function import + update the cached-token DialPlan branch to `store.is_expired()`. - Tests: rewrite the three existing `is_expired` cases to method form and add `_sentinel_u64_max` covering the saturating_add behavior. Mira Tick 50 non-blocking cleanup. --- openab-agent/src/auth.rs | 42 ++++++++++++++++++++------------- openab-agent/src/mcp/runtime.rs | 8 +++---- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index e5accc68d..54d63f1c2 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -39,6 +39,21 @@ pub struct TokenStore { pub provider: String, } +impl TokenStore { + /// True when the cached access token has expired (with `REFRESH_SKEW_SECONDS` + /// safety margin so callers refresh proactively). `u64::MAX` is the + /// "never expires" sentinel used by providers that omit `expires_in` + /// — `saturating_add` keeps the skew arithmetic safe against the sentinel + /// and against any other near-`u64::MAX` clock value. + pub fn is_expired(&self) -> bool { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + now.saturating_add(REFRESH_SKEW_SECONDS) >= self.expires_at + } +} + /// Transient per-server state captured at `start_paste_login` and consumed /// by `complete_login` (ADR §6.4). Lives in `auth.json` under /// `mcp-pending:`. `token_url` + `provider_name` are snapshotted @@ -295,17 +310,9 @@ pub fn remove_namespaced_token(key: &str) -> Result<()> { write_auth_file(&path, &map) } -pub(crate) fn is_expired(store: &TokenStore) -> bool { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - now + REFRESH_SKEW_SECONDS >= store.expires_at -} - pub async fn get_valid_token() -> Result { let mut store = load_tokens()?; - if is_expired(&store) { + if store.is_expired() { store = refresh_token(&store).await?; save_tokens(&store)?; } @@ -658,11 +665,7 @@ pub async fn login_codex_device_flow() -> Result<()> { pub fn show_status() { match load_tokens() { Ok(store) => { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - let expired = now + REFRESH_SKEW_SECONDS >= store.expires_at; + let expired = store.is_expired(); let masked = if store.access_token.len() > 12 { format!( "{}...{}", @@ -707,12 +710,12 @@ mod tests { .duration_since(UNIX_EPOCH) .unwrap() .as_secs(); - assert!(!is_expired(&make_store(now + 3600))); + assert!(!make_store(now + 3600).is_expired()); } #[test] fn test_is_expired_past_token() { - assert!(is_expired(&make_store(0))); + assert!(make_store(0).is_expired()); } #[test] @@ -721,7 +724,12 @@ mod tests { .duration_since(UNIX_EPOCH) .unwrap() .as_secs(); - assert!(is_expired(&make_store(now + 60))); + assert!(make_store(now + 60).is_expired()); + } + + #[test] + fn test_is_expired_sentinel_u64_max() { + assert!(!make_store(u64::MAX).is_expired()); } #[test] diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 79318680b..53ea5a83e 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -28,9 +28,9 @@ use super::config::{McpConfig, ServerConfig}; use super::flow::{init_paste_authorize, parse_paste_callback}; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; use crate::auth::{ - auth_path, is_expired, list_pending_logins_at, load_namespaced_token_at, load_pending_login, - pending_key, remove_pending_login, save_namespaced_token_at, save_pending_login, - PendingPasteLogin, TokenStore, + auth_path, list_pending_logins_at, load_namespaced_token_at, load_pending_login, pending_key, + remove_pending_login, save_namespaced_token_at, save_pending_login, PendingPasteLogin, + TokenStore, }; #[derive(Debug, Clone, PartialEq, Eq)] @@ -618,7 +618,7 @@ impl McpRuntimeManager { oauth: Some(_), .. } => match load_namespaced_token_at(&self.auth_path, name) { - Ok(store) if !is_expired(&store) => DialPlan::Dial(Dial::Http { + Ok(store) if !store.is_expired() => DialPlan::Dial(Dial::Http { url, auth: Some(store.access_token), }), From 0dee1d7c6dfd07878c491c016628d201ec05bd27 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 11:21:31 +0000 Subject: [PATCH 88/98] chore(openab-agent/mcp): simplify-pass cleanup Drop stale `#[allow(dead_code)]` markers now that runtime/flow wire all items through prod paths. Reuse `reqwest::Client` across the device-poll loop so TLS / TCP handshakes are amortized across the dozens-to-hundreds of polls a 30-minute device-flow window can produce. Trim session-narrative parentheticals from doc comments. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 6 ++--- openab-agent/src/mcp/flow.rs | 1 - openab-agent/src/mcp/oauth.rs | 6 ----- openab-agent/src/mcp/runtime.rs | 39 +++++++++++++++++++++------------ 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 54d63f1c2..c51922427 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -181,9 +181,9 @@ pub fn load_namespaced_token(key: &str) -> Result { load_namespaced_token_at(&auth_path(), key) } -/// Path-injected sibling of `load_namespaced_token` (Tick 42 lesson). +/// Path-injected sibling of `load_namespaced_token` so tests + the runtime +/// manager can target a tempdir without `$HOME` overrides. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp login regression test) pub fn load_namespaced_token_at(path: &Path, key: &str) -> Result { let map = read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; @@ -204,7 +204,7 @@ pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { } /// Path-injected sibling of `save_namespaced_token` so tests + the runtime -/// manager can target a tempdir without `$HOME` overrides (Tick 42 lesson). +/// manager can target a tempdir without `$HOME` overrides. #[cfg(feature = "mcp")] pub fn save_namespaced_token_at(path: &Path, key: &str, store: &TokenStore) -> Result<()> { let mut map = read_auth_file(path).unwrap_or_default(); diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index 7d0fd7c80..c751962aa 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -62,7 +62,6 @@ pub fn init_paste_authorize( /// mismatched `state` indicates CSRF / cross-flow contamination and MUST /// reject the exchange before any token-endpoint round-trip. Tolerates /// extra query params (vendor-specific tracking, `iss`, etc.). -#[allow(dead_code)] // wired in next slice (runtime::complete_login) pub fn parse_paste_callback(redirect_url: &str, expected_state: &str) -> Result { let url = Url::parse(redirect_url).map_err(|e| anyhow!("invalid redirect URL: {e}"))?; let mut code = None; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 60052f98a..43af5caa6 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -3,12 +3,6 @@ //! (§6.4) lands in subsequent slices; this module is the data layer the //! login / refresh code will dispatch through. -// The §6.4 login slice is the first prod caller — until then, every item -// here is reachable only via the unit tests below, so `cargo clippy -// --features mcp -- -D warnings` would flag them as dead. Module-scope -// allow rather than per-item once that slice lands. -#![allow(dead_code)] - use anyhow::{anyhow, Result}; use super::config::OAuthConfig; diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 53ea5a83e..5b5e73a53 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -272,8 +272,8 @@ impl McpRuntimeManager { ) -> Result<()> { // `expires_in: None` means the provider didn't advertise a // lifetime (Figma, Sentry, xAI as of writing). Falling back to - // `now + 0` (Mira's Tick 46 catch) would set the token "already - // expired", triggering an immediate refresh on the next + // `now + 0` would set the token "already expired", + // triggering an immediate refresh on the next // connect() — which fails closed if refresh_token is also None, // bouncing the user back to NeedsAuth seconds after a successful // login. Treat absent `expires_in` as a long-lived token via the @@ -417,6 +417,18 @@ impl McpRuntimeManager { initial_interval: u64, expires_in_secs: u64, ) { + // One client across the whole loop — `reqwest::Client` is an + // `Arc`-backed connection pool, so reusing it keeps TLS / TCP + // handshakes amortized across the dozens-to-hundreds of polls a + // 30-minute device-flow window can produce. + let client = match reqwest::Client::builder().build() { + Ok(c) => c, + Err(e) => { + self.mark_device_login_failed(name, anyhow!("build reqwest client: {e}")) + .await; + return; + } + }; let deadline = now_secs().saturating_add(expires_in_secs); let mut interval = initial_interval; loop { @@ -429,13 +441,14 @@ impl McpRuntimeManager { .await; return; } - let outcome = match post_device_token_poll(token_url, client_id, device_code).await { - Ok(o) => o, - Err(e) => { - self.mark_device_login_failed(name, e).await; - return; - } - }; + let outcome = + match post_device_token_poll(&client, token_url, client_id, device_code).await { + Ok(o) => o, + Err(e) => { + self.mark_device_login_failed(name, e).await; + return; + } + }; match outcome { DevicePollOutcome::Success(resp) => { self.finalize_device_login(name, provider_name, token_url, resp) @@ -463,7 +476,7 @@ impl McpRuntimeManager { /// Pure-persistence tail of `run_device_poll_loop` on RFC 8628 §3.5 /// Success. Mirrors `finish_login`'s `u64::MAX` sentinel for absent - /// `expires_in` (Mira Tick 46 catch). + /// `expires_in`. async fn finalize_device_login( &self, name: &str, @@ -883,13 +896,11 @@ async fn post_device_authorization( /// off). Returns a `DevicePollOutcome` so the loop can distinguish the /// four RFC 8628 §3.5 flow states from real errors. async fn post_device_token_poll( + client: &reqwest::Client, token_url: &str, client_id: &str, device_code: &str, ) -> Result { - let client = reqwest::Client::builder() - .build() - .context("build reqwest client")?; let resp = client .post(token_url) .form(&[ @@ -1526,7 +1537,7 @@ mod tests { assert!(token.refresh_token.is_empty()); // Long-lived sentinel: no `expires_in` from the provider must NOT // cause an immediate-expiry / refresh-loop / NeedsAuth bounce on - // first use (Mira Tick 46 catch). + // first use. assert_eq!(token.expires_at, u64::MAX); } From b8167631fee7643dc4cc43223a486ab748bf8115 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 11:31:25 +0000 Subject: [PATCH 89/98] refactor(openab-agent/mcp): simplify-pass second cleanup Reviewer-pass findings: - Drop dead auth.rs wrappers (`load/save/remove_namespaced_token`) that were never wired; runtime/tests use the `_at` siblings directly. - Extract `build_token_store` helper to dedupe the `TokenStore` construction + `u64::MAX` sentinel + refresh-rotation logic across `finish_login`, `finalize_device_login`, and `try_refresh_oauth_token`. - Preserve underlying error context in `complete_login` via `.with_context()` instead of `.map_err(|_| anyhow!(...))`. - Trim session-narrative parentheticals ("Tick N", "Mira's review") from doc comments; keep the technical reason that future readers need. - Refresh stale doc strings that still said "next slice" for behaviour Phase 2 itself ships. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 58 +++------------- openab-agent/src/mcp/meta_tool.rs | 8 +-- openab-agent/src/mcp/oauth.rs | 4 +- openab-agent/src/mcp/runtime.rs | 109 ++++++++++++++---------------- 4 files changed, 69 insertions(+), 110 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index c51922427..e0d750a9b 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -74,11 +74,8 @@ pub struct PendingPasteLogin { /// `access_token`, `PendingPasteLogin` has required `verifier` — the /// shapes are disjoint, so deserialization picks the right variant /// without an explicit tag (and existing files stay byte-compatible). -/// -/// Per Mira's Tick 39 review: option-A (repurposing TokenStore fields for -/// pending state) would have made the refresh task treat pending entries -/// as "expired tokens" and loop on them. The untagged enum keeps the two -/// state machines completely separate. +/// Keeping the two as distinct variants stops the refresh task from +/// treating pending entries as "expired tokens" and looping on them. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] pub enum AuthEntry { @@ -172,17 +169,11 @@ fn save_tokens(store: &TokenStore) -> Result<()> { write_auth_file(&path, &map) } -/// Look up the credential at `key` (e.g. `mcp:linear`). Returns the codex -/// entry for `key = "codex"`, but prefer `load_tokens()` for that path — -/// this helper exists for MCP server-namespaced lookups (ADR §6.1). -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) -pub fn load_namespaced_token(key: &str) -> Result { - load_namespaced_token_at(&auth_path(), key) -} - -/// Path-injected sibling of `load_namespaced_token` so tests + the runtime -/// manager can target a tempdir without `$HOME` overrides. +/// Look up the credential at `key` (e.g. `mcp:linear`). `path` is injected +/// so the runtime manager + tests can target a tempdir without `$HOME` +/// overrides. Returns the codex entry for `key = "codex"`, but prefer +/// `load_tokens()` for that path — this helper exists for MCP +/// server-namespaced lookups (ADR §6.1). #[cfg(feature = "mcp")] pub fn load_namespaced_token_at(path: &Path, key: &str) -> Result { let map = @@ -195,16 +186,10 @@ pub fn load_namespaced_token_at(path: &Path, key: &str) -> Result { } /// Insert or replace the credential at `key`, preserving all other entries. -/// Read-modify-write on a single file: callers in the same process must -/// serialize themselves (the lifecycle manager already does per ADR §5.7). -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) -pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { - save_namespaced_token_at(&auth_path(), key, store) -} - -/// Path-injected sibling of `save_namespaced_token` so tests + the runtime -/// manager can target a tempdir without `$HOME` overrides. +/// `path` is injected so the runtime manager + tests can target a tempdir +/// without `$HOME` overrides. Read-modify-write on a single file: callers +/// in the same process must serialize themselves (the lifecycle manager +/// already does per ADR §5.7). #[cfg(feature = "mcp")] pub fn save_namespaced_token_at(path: &Path, key: &str, store: &TokenStore) -> Result<()> { let mut map = read_auth_file(path).unwrap_or_default(); @@ -289,27 +274,6 @@ pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { write_auth_file(path, &map) } -/// Remove the credential at `key`. Idempotent — missing key is not an -/// error. If the map becomes empty, the file is deleted so `mcp doctor` -/// can report "no credentials" instead of "empty file". -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp logout / revoked-refresh recovery) -pub fn remove_namespaced_token(key: &str) -> Result<()> { - let path = auth_path(); - let mut map = match read_auth_file(&path) { - Ok(m) => m, - Err(_) => return Ok(()), - }; - if map.remove(key).is_none() { - return Ok(()); - } - if map.is_empty() { - let _ = std::fs::remove_file(&path); - return Ok(()); - } - write_auth_file(&path, &map) -} - pub async fn get_valid_token() -> Result { let mut store = load_tokens()?; if store.is_expired() { diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 6a8d6060f..e9bd69806 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -74,10 +74,10 @@ async fn call_tool( tool: &str, arguments: Value, ) -> Result { - // Lenient arg coercion per Mira's Tick 18 review: LLMs often send - // `null` or omit `arguments` for no-arg tools; rejecting those would - // make zero-arg calls fragile. Only real type errors (string, number, - // array, bool) are refused. + // Lenient arg coercion: LLMs often send `null` or omit `arguments` + // for no-arg tools; rejecting those would make zero-arg calls + // fragile. Only real type errors (string, number, array, bool) + // are refused. let args_map = match arguments { Value::Object(map) => map, Value::Null => serde_json::Map::new(), diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 43af5caa6..38024d822 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -180,8 +180,8 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result mod tests { use super::*; - // Both env-touching tests below race the same OS env var; serialize - // them per the runbook's Tick 24 lesson (acp.rs ANTHROPIC_API_KEY race). + // Both env-touching tests below race the same OS env var — `set_var` + // is unsound under concurrent reads, so serialize them. static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); #[test] diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 5b5e73a53..319e51e8f 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -105,7 +105,7 @@ pub struct McpRuntimeManager { handles: Arc>>, /// `auth.json` location used for `mcp-pending:` persistence. /// Injectable so tests can point at a tempdir instead of `$HOME`, - /// avoiding cross-module HOME-env races (Tick 24 lesson + ADR §6.4). + /// avoiding cross-module HOME-env races (ADR §6.4). auth_path: PathBuf, } @@ -229,10 +229,10 @@ impl McpRuntimeManager { } /// Read the on-disk pending paste-login for `name`. `None` if there's - /// no entry or the file is unreadable. Used by `complete_login` to - /// drive flow continuation and by `mcp status` to surface a partially - /// completed login (next slice will add the status surfacing). - #[allow(dead_code)] // wired in next slice (mcp status surfacing) + /// no entry or the file is unreadable. `mcp status` surfaces in-flight + /// logins via `list_pending_logins_at`; this accessor is the single- + /// entry counterpart for callers that need the full snapshot. + #[allow(dead_code)] // accessor for future per-entry status detail pub async fn pending_paste_login(&self, name: &str) -> Option { load_pending_login(&self.auth_path, &pending_key(name)).ok() } @@ -245,8 +245,10 @@ impl McpRuntimeManager { /// transitions `NeedsAuth → Disconnected` so the next `connect()` /// dials the now-authenticated transport. pub async fn complete_login(&self, name: &str, redirect_url: &str) -> Result<()> { - let pending = load_pending_login(&self.auth_path, &pending_key(name)) - .map_err(|_| anyhow!("no pending login for {name:?}; run `mcp login {name}` first"))?; + let pending = + load_pending_login(&self.auth_path, &pending_key(name)).with_context(|| { + format!("no pending login for {name:?}; run `mcp login {name}` first") + })?; let code = parse_paste_callback(redirect_url, &pending.state)?; let (_provider, client_id, redirect_uri) = self.resolve_paste_client(name).await?; let resp = post_token_exchange( @@ -270,27 +272,12 @@ impl McpRuntimeManager { pending: &PendingPasteLogin, resp: TokenExchangeResponse, ) -> Result<()> { - // `expires_in: None` means the provider didn't advertise a - // lifetime (Figma, Sentry, xAI as of writing). Falling back to - // `now + 0` would set the token "already expired", - // triggering an immediate refresh on the next - // connect() — which fails closed if refresh_token is also None, - // bouncing the user back to NeedsAuth seconds after a successful - // login. Treat absent `expires_in` as a long-lived token via the - // u64::MAX sentinel: `is_expired` will return false until the - // provider eventually 401s on use (at which point the user runs - // `mcp login` again, the correct UX for non-refreshable tokens). - let expires_at = match resp.expires_in { - Some(secs) => now_secs().saturating_add(secs), - None => u64::MAX, - }; - let store = TokenStore { - access_token: resp.access_token, - refresh_token: resp.refresh_token.unwrap_or_default(), - expires_at, - token_endpoint: pending.token_url.clone(), - provider: pending.provider_name.clone(), - }; + let store = build_token_store( + resp, + pending.token_url.clone(), + pending.provider_name.clone(), + None, + ); save_namespaced_token_at(&self.auth_path, name, &store)?; remove_pending_login(&self.auth_path, &pending_key(name))?; let mut handles = self.handles.write().await; @@ -475,8 +462,7 @@ impl McpRuntimeManager { } /// Pure-persistence tail of `run_device_poll_loop` on RFC 8628 §3.5 - /// Success. Mirrors `finish_login`'s `u64::MAX` sentinel for absent - /// `expires_in`. + /// Success. async fn finalize_device_login( &self, name: &str, @@ -484,17 +470,7 @@ impl McpRuntimeManager { token_url: &str, resp: TokenExchangeResponse, ) { - let expires_at = match resp.expires_in { - Some(secs) => now_secs().saturating_add(secs), - None => u64::MAX, - }; - let store = TokenStore { - access_token: resp.access_token, - refresh_token: resp.refresh_token.unwrap_or_default(), - expires_at, - token_endpoint: token_url.to_string(), - provider: provider_name.to_string(), - }; + let store = build_token_store(resp, token_url.to_string(), provider_name.to_string(), None); if let Err(e) = save_namespaced_token_at(&self.auth_path, name, &store) { self.mark_device_login_failed(name, e).await; return; @@ -589,20 +565,12 @@ impl McpRuntimeManager { let (_provider, client_id, _redirect_uri) = self.resolve_paste_client(name).await?; let resp = post_token_refresh(&store.token_endpoint, &client_id, &store.refresh_token).await?; - let new_refresh = resp - .refresh_token - .unwrap_or_else(|| store.refresh_token.clone()); - let expires_at = match resp.expires_in { - Some(secs) => now_secs().saturating_add(secs), - None => u64::MAX, - }; - let new_store = TokenStore { - access_token: resp.access_token, - refresh_token: new_refresh, - expires_at, - token_endpoint: store.token_endpoint.clone(), - provider: store.provider.clone(), - }; + let new_store = build_token_store( + resp, + store.token_endpoint.clone(), + store.provider.clone(), + Some(store.refresh_token.clone()), + ); save_namespaced_token_at(&self.auth_path, name, &new_store)?; Ok(new_store) } @@ -740,6 +708,33 @@ struct TokenExchangeResponse { expires_in: Option, } +/// Lift a token-endpoint response into the on-disk `TokenStore` shape. +/// `expires_in: None` → `u64::MAX` sentinel (treated as never-expires by +/// `TokenStore::is_expired`); a `now + 0` would mark the token already +/// expired and bounce the user back through login on the next connect(). +/// `fallback_refresh` preserves the previous refresh token on rotation +/// when the provider omits one (ADR §6.6 Google-style); fresh logins +/// pass `None` so an omitted refresh token records as empty. +fn build_token_store( + resp: TokenExchangeResponse, + token_endpoint: String, + provider: String, + fallback_refresh: Option, +) -> TokenStore { + let expires_at = match resp.expires_in { + Some(secs) => now_secs().saturating_add(secs), + None => u64::MAX, + }; + let refresh_token = resp.refresh_token.or(fallback_refresh).unwrap_or_default(); + TokenStore { + access_token: resp.access_token, + refresh_token, + expires_at, + token_endpoint, + provider, + } +} + /// Shared POST helper for both `post_token_exchange` (RFC 6749 §4.1.3) /// and `post_token_refresh` (RFC 6749 §6). Public client — no /// `client_secret`. Errors fold body text into the message so transient @@ -1125,8 +1120,8 @@ mod tests { } } - // start_paste_login + builtin_client_id race on the same env var. - // Same fix as oauth.rs / acp.rs (Tick 24 lesson). + // start_paste_login + builtin_client_id race on the same OS env var — + // `set_var` is unsound under concurrent reads, so serialize them. static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); fn linear_custom_cfg() -> &'static str { From c053b06de08083034e33a525a13b8843aa4bbf0a Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 12:08:52 +0000 Subject: [PATCH 90/98] docs(openab-agent/mcp): document refresh-race tradeoff per Kirin review Concurrent connect() callers may each hit `try_refresh_oauth_token` with the same stale `refresh_token` (no per-server single-flight gate yet). The connect() race guard prevents state corruption; the worst case is N duplicate POSTs to the token endpoint. A Phase 3 follow-up will add per-server single-flight for providers that cascade-revoke on replayed refresh tokens. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 319e51e8f..fe457ffb1 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -558,6 +558,18 @@ impl McpRuntimeManager { /// (Google-style rotation); the agent fsyncs `auth.json` before /// returning so deployment-side mtime watchers can sync the rotated /// token to peer replicas. + /// + /// Concurrent `connect()` callers on the same server may each enter + /// this path with the same stale `refresh_token` — there is no + /// per-server single-flight gate here. That is intentional for now: + /// the connect() race guard (see the `NeedsRefresh` branch below) + /// keeps a failed concurrent refresh from clobbering a peer's + /// successful `Connected` install, so the worst-case outcome is N + /// duplicate POSTs to the token endpoint rather than state + /// corruption. Providers that cascade-revoke on replayed refresh + /// tokens would still bounce the user back through `mcp login`; + /// a Phase 3 follow-up will add a `tokio::sync::Mutex` per server + /// (or `OnceCell`) to single-flight refreshes. async fn try_refresh_oauth_token(&self, name: &str, store: &TokenStore) -> Result { if store.refresh_token.is_empty() { return Err(anyhow!("no refresh_token cached for {name:?}")); From 79dfa810c95c6b30a84acd8aee49ec09517cd259 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 12:17:12 +0000 Subject: [PATCH 91/98] =?UTF-8?q?docs(adr/mcp):=20=C2=A76.4=20device-flow?= =?UTF-8?q?=20success=20transitions=20to=20Disconnected?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align ADR with implementation: the device-code polling loop persists tokens then sets status to Disconnected, so the next connect() reads the cached token via the oauth-aware dial path. Keeps the rmcp handshake out of the detached polling task. Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index 1661cbbda..1b1e4be53 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -545,8 +545,8 @@ RFC 8414 dynamic discovery (`/.well-known/oauth-authorization-server`) is **disa **Device-code flow** (typically platform OAuth: Anthropic, OpenAI, xAI): - `login` returns `{ flow: "device", user_code, verification_url, expires_in }`. Agent relays to chat: "Open `https://example.com/device`, enter code: `ABCD-EFGH`". -- Runtime polls the token endpoint in background (5s interval, RFC 8628 §3.5). On success, persists tokens under `mcp:X`, transitions server to `Connected`. -- LLM checks `mcp(action: "status", server: X)` to learn when ready; `complete_login` not required for this branch. +- Runtime polls the token endpoint in background (5s interval, RFC 8628 §3.5). On success, persists tokens under `mcp:X`, transitions server to `Disconnected` so the next `connect()` reads the cached token via the oauth-aware dial path and reaches `Connected` through the normal lifecycle. Keeping the rmcp handshake out of the polling task avoids spawning child processes from a detached `tokio::task`. +- LLM checks `mcp(action: "status", server: X)` to learn when the polling loop completes (status leaves `Connecting`); `complete_login` is not required for this branch — the next `mcp call` triggers `connect()`. **Paste-back flow** (typically MCP SaaS: Linear, Notion, Figma, ...): From 98b912560a8be5503dcf166f2955b21c4efd3310 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 12:21:06 +0000 Subject: [PATCH 92/98] fix(openab-agent/mcp): abort prior device-poll task on login retry Track the most-recent device-poll JoinHandle per server in `McpRuntimeManager.device_login_tasks`. On a fresh `start_device_login` the prior task is aborted before the new one is registered, so a retry after a transient failure can't leave two loops racing to finalize the same server (would risk double-persisting tokens / contradictory status transitions). Phase 2 follow-up #4 of 4. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index fe457ffb1..a064276eb 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -14,7 +14,7 @@ use std::collections::HashMap; use std::path::PathBuf; -use std::sync::Arc; +use std::sync::{Arc, Mutex as StdMutex}; use anyhow::{anyhow, Context, Result}; use rmcp::service::{RoleClient, RunningService}; @@ -23,6 +23,7 @@ use rmcp::transport::{ConfigureCommandExt, StreamableHttpClientTransport, TokioC use rmcp::ServiceExt; use tokio::process::Command; use tokio::sync::RwLock; +use tokio::task::AbortHandle; use super::config::{McpConfig, ServerConfig}; use super::flow::{init_paste_authorize, parse_paste_callback}; @@ -107,6 +108,12 @@ pub struct McpRuntimeManager { /// Injectable so tests can point at a tempdir instead of `$HOME`, /// avoiding cross-module HOME-env races (ADR §6.4). auth_path: PathBuf, + /// Abort handle of the most-recent device-poll task per server. A + /// fresh `start_device_login` aborts the prior poller so a retry + /// after a transient failure doesn't leave two loops racing to + /// finalize the same server. `std::sync::Mutex` is fine: the lock + /// is only held for `HashMap` ops, never across `.await`. + device_login_tasks: Arc>>, } impl McpRuntimeManager { @@ -131,6 +138,7 @@ impl McpRuntimeManager { Self { handles: Arc::new(RwLock::new(handles)), auth_path, + device_login_tasks: Arc::new(StdMutex::new(HashMap::new())), } } @@ -324,7 +332,8 @@ impl McpRuntimeManager { let token_url_owned = token_url; let client_id_owned = client_id; let provider_name_owned = provider_name; - tokio::spawn(async move { + let task_name = name.to_string(); + let handle = tokio::spawn(async move { manager .run_device_poll_loop( &name_owned, @@ -337,6 +346,16 @@ impl McpRuntimeManager { ) .await; }); + let prior = { + let mut tasks = self + .device_login_tasks + .lock() + .expect("device_login_tasks mutex poisoned"); + tasks.insert(task_name, handle.abort_handle()) + }; + if let Some(prior) = prior { + prior.abort(); + } Ok(DeviceLoginStart { user_code: auth.user_code, verification_uri: auth.verification_uri, From 85ea7587133071c0c42d4399ae46036ffe17f7fd Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 12:23:50 +0000 Subject: [PATCH 93/98] fix(openab-agent/mcp): single-flight refresh-grant per server Add `refresh_locks: HashMap>` and acquire the per-server lock before the refresh POST. After taking the lock, re-read the on-disk token; if a prior waiter already refreshed, return the cached store without a second POST. Closes the cascade-revoke window on providers (Google, ...) that invalidate every issued refresh_token when the same one is replayed concurrently. Phase 2 follow-up #1 of 4. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 63 +++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index a064276eb..870e04e95 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -114,6 +114,13 @@ pub struct McpRuntimeManager { /// finalize the same server. `std::sync::Mutex` is fine: the lock /// is only held for `HashMap` ops, never across `.await`. device_login_tasks: Arc>>, + /// Per-server single-flight gate for refresh-grant requests. The + /// outer `StdMutex` guards the map (held only for `entry().or_insert` + /// ops, never across `.await`); the inner `tokio::Mutex` is held + /// across the network round-trip + disk write so concurrent waiters + /// observe the winner's rotated token instead of replaying a stale + /// refresh_token (which providers like Google would cascade-revoke). + refresh_locks: Arc>>>>, } impl McpRuntimeManager { @@ -139,6 +146,7 @@ impl McpRuntimeManager { handles: Arc::new(RwLock::new(handles)), auth_path, device_login_tasks: Arc::new(StdMutex::new(HashMap::new())), + refresh_locks: Arc::new(StdMutex::new(HashMap::new())), } } @@ -578,21 +586,31 @@ impl McpRuntimeManager { /// returning so deployment-side mtime watchers can sync the rotated /// token to peer replicas. /// - /// Concurrent `connect()` callers on the same server may each enter - /// this path with the same stale `refresh_token` — there is no - /// per-server single-flight gate here. That is intentional for now: - /// the connect() race guard (see the `NeedsRefresh` branch below) - /// keeps a failed concurrent refresh from clobbering a peer's - /// successful `Connected` install, so the worst-case outcome is N - /// duplicate POSTs to the token endpoint rather than state - /// corruption. Providers that cascade-revoke on replayed refresh - /// tokens would still bounce the user back through `mcp login`; - /// a Phase 3 follow-up will add a `tokio::sync::Mutex` per server - /// (or `OnceCell`) to single-flight refreshes. + /// Per-server single-flight: concurrent `connect()` callers serialize + /// on `refresh_locks[name]`. After acquiring the lock, the function + /// re-reads the on-disk token; if a prior waiter already refreshed, + /// the cached store is returned without a second POST. This prevents + /// replayed-refresh cascade-revokes on providers like Google. async fn try_refresh_oauth_token(&self, name: &str, store: &TokenStore) -> Result { if store.refresh_token.is_empty() { return Err(anyhow!("no refresh_token cached for {name:?}")); } + let lock = { + let mut locks = self + .refresh_locks + .lock() + .expect("refresh_locks mutex poisoned"); + locks + .entry(name.to_string()) + .or_insert_with(|| Arc::new(tokio::sync::Mutex::new(()))) + .clone() + }; + let _guard = lock.lock().await; + if let Ok(cached) = load_namespaced_token_at(&self.auth_path, name) { + if !cached.is_expired() { + return Ok(cached); + } + } let (_provider, client_id, _redirect_uri) = self.resolve_paste_client(name).await?; let resp = post_token_refresh(&store.token_endpoint, &client_id, &store.refresh_token).await?; @@ -1705,4 +1723,27 @@ mod tests { "config validation should have passed; got: {err}" ); } + + #[tokio::test] + async fn try_refresh_short_circuits_when_disk_has_fresh_token() { + // Single-flight contract: if another waiter has already refreshed + // (fresh token on disk), `try_refresh_oauth_token` must return the + // cached store without POSTing to the dead `token_endpoint`. The + // input `store` is intentionally stale (zero `expires_at`) and + // points at 127.0.0.1:1 — any POST attempt would surface a connect + // error, so a successful return proves the re-check ran. + let cfg: McpConfig = serde_json::from_str(dead_oauth_cfg()).unwrap(); + let (mgr, _dir) = mgr_with_tempdir(cfg); + seed_token(&mgr, "linear", u64::MAX); + let stale = TokenStore { + access_token: "stale".to_string(), + refresh_token: "rtok".to_string(), + expires_at: 0, + token_endpoint: "http://127.0.0.1:1/token".to_string(), + provider: "linear".to_string(), + }; + let fresh = mgr.try_refresh_oauth_token("linear", &stale).await.unwrap(); + assert_eq!(fresh.access_token, "atok-linear"); + assert!(!fresh.is_expired()); + } } From 97fa796a51eca685304a3f1121f4ecaa68e0a812 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 12:25:35 +0000 Subject: [PATCH 94/98] fix(openab-agent): atomic auth.json via tmp + rename(2) + parent fsync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the truncate+write+sync_all path with: write same-dir tmp (PID + atomic counter for uniqueness, mode 0o600 via create_new), fsync the tmp, rename(2) onto auth.json, fsync the parent directory. On crash, the file is either the prior version or the new version — never half-written. The old implementation truncated `auth.json` before writing, so a mid-write crash left a corrupt file that the next startup would fail to parse. On the ECS deployment that then re-restored a now-revoked refresh_token from S3 (ADR §6.1). Phase 2 follow-up #3 of 4. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 55 ++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index e0d750a9b..12dee323f 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -115,28 +115,51 @@ fn read_auth_file(path: &Path) -> Result> { serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) } -/// Atomically replace `auth.json` with the new map. `fsync(2)` after write -/// satisfies the ADR §6.1 refresh-token rotation contract — without it, a -/// Spot interruption between local write and S3 sync would restore a -/// revoked refresh token from durable storage on the next task start. +/// Atomically replace `auth.json` with the new map via tmp + `rename(2)` + +/// parent-dir fsync. A crash between the tmp write and the rename leaves +/// `auth.json` unchanged; a crash after the rename has the new file +/// already durable. Satisfies the ADR §6.1 refresh-token rotation +/// contract — without rename atomicity, a Spot interruption mid-write +/// would leave a half-written `auth.json` that the next task start would +/// fail to parse, then re-restore from S3 with a now-revoked refresh +/// token. fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { - if let Some(dir) = path.parent() { - std::fs::create_dir_all(dir)?; - } + let dir = path.parent().unwrap_or(Path::new(".")); + std::fs::create_dir_all(dir)?; let data = serde_json::to_string_pretty(map)?; #[cfg(unix)] { - use std::fs::OpenOptions; + use std::fs::{File, OpenOptions}; use std::io::Write as _; use std::os::unix::fs::OpenOptionsExt; - let mut file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .mode(0o600) - .open(path)?; - file.write_all(data.as_bytes())?; - file.sync_all()?; + use std::sync::atomic::{AtomicU64, Ordering}; + static TMP_COUNTER: AtomicU64 = AtomicU64::new(0); + let seq = TMP_COUNTER.fetch_add(1, Ordering::Relaxed); + let tmp = dir.join(format!("auth.json.tmp.{}.{seq}", std::process::id())); + let write_and_sync = || -> Result<()> { + let mut file = OpenOptions::new() + .write(true) + .create_new(true) + .mode(0o600) + .open(&tmp)?; + file.write_all(data.as_bytes())?; + file.sync_all()?; + Ok(()) + }; + if let Err(e) = write_and_sync() { + let _ = std::fs::remove_file(&tmp); + return Err(e); + } + if let Err(e) = std::fs::rename(&tmp, path) { + let _ = std::fs::remove_file(&tmp); + return Err(e.into()); + } + // fsync the parent dir so the rename itself is durable; without + // this, the inode swap can be reordered after a power loss even + // though the tmp's contents were synced. + if let Ok(dir_handle) = File::open(dir) { + let _ = dir_handle.sync_all(); + } } #[cfg(not(unix))] { From b3a310b00bf8c0037666a0977598ad9da7d5beb6 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 14:22:41 +0000 Subject: [PATCH 95/98] refactor(openab-agent): remove --features mcp flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MCP is now an unconditional dep. Drops the `[features]` block in Cargo.toml, promotes `rmcp` from optional to required, strips every `#[cfg(feature = "mcp")]` / `#[cfg(not(feature = "mcp"))]` gate in src/{acp,agent,auth,main}.rs, and drops the no-mcp combo from the CI matrix (both .github/workflows/ci-openab-agent.yml and the local mirror script). Per ADR `docs/adr/openab-agent-mcp.md` §9 Phase 3 — the intermediate `default = ["mcp"]` step is skipped in favor of going straight to flag removal (decision recorded in tracking issue #966). Refs #966 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- .github/workflows/ci-openab-agent.yml | 2 -- openab-agent/Cargo.toml | 6 +----- openab-agent/src/acp.rs | 10 +--------- openab-agent/src/agent.rs | 18 +----------------- openab-agent/src/auth.rs | 15 --------------- openab-agent/src/main.rs | 4 ---- 6 files changed, 3 insertions(+), 52 deletions(-) diff --git a/.github/workflows/ci-openab-agent.yml b/.github/workflows/ci-openab-agent.yml index b33d8b613..11d9290ca 100644 --- a/.github/workflows/ci-openab-agent.yml +++ b/.github/workflows/ci-openab-agent.yml @@ -26,9 +26,7 @@ jobs: workspaces: openab-agent - run: cargo fmt --check - run: cargo clippy -- -D warnings - - run: cargo clippy --features mcp -- -D warnings - run: cargo test - - run: cargo test --features mcp - run: cargo test -- --ignored env: ANTHROPIC_API_KEY: "fake-key-for-ci" diff --git a/openab-agent/Cargo.toml b/openab-agent/Cargo.toml index 5b091a47e..00906c88d 100644 --- a/openab-agent/Cargo.toml +++ b/openab-agent/Cargo.toml @@ -21,7 +21,7 @@ getrandom = "0.4.2" urlencoding = "2.1.3" open = "5.3.5" url = "2.5.8" -rmcp = { version = "1.7", default-features = false, optional = true, features = [ +rmcp = { version = "1.7", default-features = false, features = [ "client", "transport-child-process", "transport-streamable-http-client-reqwest", @@ -31,10 +31,6 @@ rmcp = { version = "1.7", default-features = false, optional = true, features = [target.'cfg(unix)'.dependencies] libc = "0.2" -[features] -default = [] -mcp = ["dep:rmcp"] - [dev-dependencies] tempfile = "3" temp-env = "0.3.6" diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 9585612b2..5c119fb6a 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -1,6 +1,5 @@ use crate::agent::Agent; use crate::llm::AnthropicProvider; -#[cfg(feature = "mcp")] use crate::mcp::{self, McpRuntimeManager}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -37,7 +36,6 @@ pub struct AcpServer { // TODO(v0.2): add session TTL and periodic cleanup to prevent OOM sessions: HashMap, working_dir: String, - #[cfg(feature = "mcp")] mcp_manager: Option, } @@ -48,7 +46,6 @@ impl AcpServer { working_dir: std::env::current_dir() .map(|p| p.to_string_lossy().to_string()) .unwrap_or_else(|_| "/tmp".to_string()), - #[cfg(feature = "mcp")] mcp_manager: mcp::load_runtime_or_warn(), } } @@ -160,12 +157,7 @@ impl AcpServer { } }; - let agent = Agent::new_boxed( - provider, - self.working_dir.clone(), - #[cfg(feature = "mcp")] - self.mcp_manager.clone(), - ); + let agent = Agent::new_boxed(provider, self.working_dir.clone(), self.mcp_manager.clone()); self.sessions.insert(session_id.clone(), agent); let resp = JsonRpcResponse { jsonrpc: "2.0", diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index 63f240d61..8ac7782d6 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -1,11 +1,9 @@ use anyhow::Result; -#[cfg(feature = "mcp")] use serde::Deserialize; use std::path::PathBuf; use tracing::{debug, info}; use crate::llm::{ContentBlock, LlmEvent, LlmProvider, Message, ToolDef}; -#[cfg(feature = "mcp")] use crate::mcp::{self, McpRuntimeManager}; use crate::skills; use crate::tools; @@ -20,7 +18,6 @@ You have these tools available: Be direct and concise. Execute tasks immediately rather than explaining what you would do. When you need to understand code, read the relevant files first."#; -#[cfg(feature = "mcp")] const MCP_SYSTEM_PROMPT_APPENDIX: &str = "\n\nAdditional tool:\n\ - mcp: Talk to configured MCP servers. Call `mcp(action=\"list_servers\")` \ to see what's configured, then `mcp(action=\"list_tools\", server=...)` to \ @@ -38,7 +35,6 @@ pub struct Agent { working_dir: PathBuf, system_prompt: String, tools: Vec, - #[cfg(feature = "mcp")] mcp_manager: Option, } @@ -52,7 +48,6 @@ impl Agent { working_dir: PathBuf::from(working_dir), system_prompt, tools: tools::tool_definitions(), - #[cfg(feature = "mcp")] mcp_manager: None, } } @@ -60,14 +55,10 @@ impl Agent { pub fn new_boxed( provider: Box, working_dir: String, - #[cfg(feature = "mcp")] mcp_manager: Option, + mcp_manager: Option, ) -> Self { - #[cfg(feature = "mcp")] let has_mcp = mcp_manager.is_some(); - #[cfg(not(feature = "mcp"))] - let has_mcp = false; let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); - #[cfg(feature = "mcp")] let tools = { let mut t = tools::tool_definitions(); if mcp_manager.is_some() { @@ -75,15 +66,12 @@ impl Agent { } t }; - #[cfg(not(feature = "mcp"))] - let tools = tools::tool_definitions(); Self { provider, messages: Vec::new(), working_dir: PathBuf::from(working_dir), system_prompt, tools, - #[cfg(feature = "mcp")] mcp_manager, } } @@ -91,8 +79,6 @@ impl Agent { /// Run the agent with a user prompt, executing tool calls until completion. /// Returns the final text response. fn build_system_prompt(working_dir: &str, mcp_enabled: bool) -> String { - #[cfg(not(feature = "mcp"))] - let _ = mcp_enabled; let wd = std::path::Path::new(working_dir); let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); @@ -103,7 +89,6 @@ impl Agent { format!("{}\n\n---\n\n{}", custom.trim(), SYSTEM_PROMPT) }; - #[cfg(feature = "mcp")] let base = if mcp_enabled { format!("{base}{MCP_SYSTEM_PROMPT_APPENDIX}") } else { @@ -233,7 +218,6 @@ impl Agent { /// the routing here (rather than inside `tools.rs`) lets `tools.rs` stay /// stateless and free of MCP/feature plumbing. async fn execute_tool_call(&self, name: &str, input: &serde_json::Value) -> Result { - #[cfg(feature = "mcp")] if name == mcp::MCP_TOOL_NAME { let Some(manager) = self.mcp_manager.as_ref() else { return Err(anyhow::anyhow!( diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 12dee323f..deeeda751 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -59,9 +59,6 @@ impl TokenStore { /// `mcp-pending:`. `token_url` + `provider_name` are snapshotted /// up front so a config edit between init and finish can't redirect the /// token exchange. -/// -/// Unconditionally compiled (not behind `mcp` feature) so a non-mcp build -/// can still parse + round-trip an `auth.json` containing pending entries. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct PendingPasteLogin { pub verifier: String, @@ -197,7 +194,6 @@ fn save_tokens(store: &TokenStore) -> Result<()> { /// overrides. Returns the codex entry for `key = "codex"`, but prefer /// `load_tokens()` for that path — this helper exists for MCP /// server-namespaced lookups (ADR §6.1). -#[cfg(feature = "mcp")] pub fn load_namespaced_token_at(path: &Path, key: &str) -> Result { let map = read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; @@ -213,19 +209,16 @@ pub fn load_namespaced_token_at(path: &Path, key: &str) -> Result { /// without `$HOME` overrides. Read-modify-write on a single file: callers /// in the same process must serialize themselves (the lifecycle manager /// already does per ADR §5.7). -#[cfg(feature = "mcp")] pub fn save_namespaced_token_at(path: &Path, key: &str, store: &TokenStore) -> Result<()> { let mut map = read_auth_file(path).unwrap_or_default(); map.insert(key.to_string(), AuthEntry::Token(store.clone())); write_auth_file(path, &map) } -#[cfg(feature = "mcp")] const PENDING_PREFIX: &str = "mcp-pending:"; /// `auth.json` key for an in-flight paste-back login (ADR §6.4 namespace). /// Single construction site so read/write callers can't drift on the literal. -#[cfg(feature = "mcp")] pub fn pending_key(name: &str) -> String { format!("{PENDING_PREFIX}{name}") } @@ -238,7 +231,6 @@ pub fn pending_key(name: &str) -> String { /// Synchronous filesystem read: the pending map is tiny (~one entry per /// concurrent login), so blocking is trivial and avoids `spawn_blocking` /// overhead — callers may invoke this from an async context directly. -#[cfg(feature = "mcp")] pub fn list_pending_logins_at(path: &Path) -> Vec { let Ok(map) = read_auth_file(path) else { return Vec::new(); @@ -259,7 +251,6 @@ pub fn list_pending_logins_at(path: &Path) -> Vec { /// collide, but a hand-edited file would. `path` is injected so the /// runtime manager can point tests at a tempdir; production callers pass /// `auth_path()`. -#[cfg(feature = "mcp")] pub fn load_pending_login(path: &Path, key: &str) -> Result { let map = read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; @@ -272,7 +263,6 @@ pub fn load_pending_login(path: &Path, key: &str) -> Result { /// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). /// Read-modify-write — same serialization caveat as `save_namespaced_token`. -#[cfg(feature = "mcp")] pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Result<()> { let mut map = read_auth_file(path).unwrap_or_default(); map.insert(key.to_string(), AuthEntry::Pending(val.clone())); @@ -281,7 +271,6 @@ pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Re /// Remove a pending-login entry (consumed on successful `complete_login`, /// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. -#[cfg(feature = "mcp")] pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { let mut map = match read_auth_file(path) { Ok(m) => m, @@ -838,7 +827,6 @@ mod tests { } } - #[cfg(feature = "mcp")] #[test] fn pending_login_helpers_round_trip_via_injected_path() { // Tempdir path injected directly — no HOME-env shimming, so this @@ -853,7 +841,6 @@ mod tests { assert!(load_pending_login(&path, key).is_err()); } - #[cfg(feature = "mcp")] #[test] fn list_pending_logins_strips_prefix_sorts_and_skips_tokens() { let dir = tempfile::tempdir().unwrap(); @@ -874,7 +861,6 @@ mod tests { assert_eq!(names, vec!["linear".to_string(), "zed-mcp".to_string()]); } - #[cfg(feature = "mcp")] #[test] fn list_pending_logins_returns_empty_on_missing_file() { let dir = tempfile::tempdir().unwrap(); @@ -882,7 +868,6 @@ mod tests { assert!(list_pending_logins_at(&path).is_empty()); } - #[cfg(feature = "mcp")] #[test] fn load_namespaced_token_errors_on_pending_entry() { let dir = tempfile::tempdir().unwrap(); diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 12d63c053..e4f848155 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -2,7 +2,6 @@ mod acp; mod agent; mod auth; mod llm; -#[cfg(feature = "mcp")] mod mcp; mod skills; mod tools; @@ -25,14 +24,12 @@ enum Commands { provider: AuthProvider, }, /// Inspect / manage configured MCP servers - #[cfg(feature = "mcp")] Mcp { #[command(subcommand)] action: McpAction, }, } -#[cfg(feature = "mcp")] #[derive(Subcommand)] enum McpAction { /// List configured MCP servers (loads global + project mcp.json) @@ -121,7 +118,6 @@ async fn main() { auth::show_status(); } }, - #[cfg(feature = "mcp")] Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), McpAction::Status => mcp::cli_show_status().await, From 26c7d87a0fee540023d4b445a6f82fbee7c60243 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 16:02:32 +0000 Subject: [PATCH 96/98] feat(openab-agent/mcp): circuit-breaker module (Hermes pattern) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `mcp/breaker.rs` per ADR §5.9 and the design decisions resolved in #966: - Q1 (a) Fixed cooldown 3-state breaker (Closed / Open / HalfOpen) - Q2 (a) Single consecutive-failure counter per server - Q3 (c) Lazy / piggyback probe (next call after cooldown = probe) API surface: pub enum Verdict { Allow, AllowProbe, Reject { retry_in_secs } } pub struct ServerBreaker { ... } impl ServerBreaker { pub fn new() -> Self; pub fn check(&self, server: &str) -> Verdict; pub fn record_success(&self, server: &str); pub fn record_failure(&self, server: &str); } Constants: `FAIL_THRESHOLD = 3`, `COOLDOWN = 60s`. Matches Hermes `tools/mcp_tool.py` (lines 1868-1912 + 2480-2510). ADR §5.9 mentions "3 fails in 30s" but Hermes itself uses pure consecutive count with no time window — going Hermes-simple here (any success resets). `#![allow(dead_code)]` carries the API surface until the next slice wires it into `McpRuntimeManager` (per the runbook's stub-without- prod-caller convention). Unit tests cover: unknown-server pass-through, under-threshold pass, threshold-trip, success-resets-count, cooldown-elapsed-allows-probe, probe-failure-rearms-cooldown, probe-success-closes-breaker, per- server isolation, retry_in_secs floor (1s when cooldown almost done). Refs #966 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/breaker.rs | 224 ++++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 1 + 2 files changed, 225 insertions(+) create mode 100644 openab-agent/src/mcp/breaker.rs diff --git a/openab-agent/src/mcp/breaker.rs b/openab-agent/src/mcp/breaker.rs new file mode 100644 index 000000000..cdb2fe1d9 --- /dev/null +++ b/openab-agent/src/mcp/breaker.rs @@ -0,0 +1,224 @@ +//! Per-server circuit breaker (ADR §5.9). +//! +//! Design decisions (#966): +//! - Fixed cooldown 3-state breaker (Closed / Open / HalfOpen) +//! - Single consecutive-failure counter per server (transport-level only — +//! JSON-RPC error responses and tool `isError: true` content do NOT count) +//! - Lazy / piggyback probe: after cooldown elapses the next call becomes +//! the half-open probe (matches Hermes `tools/mcp_tool.py` lines 1868-1912 +//! and 2480-2510) +//! +//! ADR §5.9 mentions "3 fails in 30s" but Hermes itself tracks pure +//! consecutive failures with no time window — going Hermes-simple here. +//! Any success resets the counter. + +#![allow(dead_code)] // wired into McpRuntimeManager in next slice + +use std::collections::HashMap; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +/// Number of consecutive transport failures that trip the breaker. +pub const FAIL_THRESHOLD: u32 = 3; + +/// Cooldown after the breaker opens before the next probe is allowed. +pub const COOLDOWN: Duration = Duration::from_secs(60); + +/// Outcome of [`ServerBreaker::check`] — the call site uses this to decide +/// whether to short-circuit or proceed (and, if proceeding, whether the +/// upcoming call is a half-open probe). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Verdict { + /// Breaker is `Closed` — call goes through normally. + Allow, + /// Breaker is `HalfOpen` — cooldown elapsed, allow exactly one probe + /// call. The next [`record_success`](ServerBreaker::record_success) or + /// [`record_failure`](ServerBreaker::record_failure) decides the next + /// state. + AllowProbe, + /// Breaker is `Open` — short-circuit the call with this hint to the + /// caller / LLM. + Reject { retry_in_secs: u64 }, +} + +#[derive(Debug, Default)] +struct Entry { + consecutive_failures: u32, + opened_at: Option, +} + +/// Per-server circuit breaker state. Cheap to clone — wraps a `Mutex` so +/// callers can share via `Arc` if they need cross-task +/// access without re-acquiring at the `McpRuntimeManager` level. +#[derive(Debug, Default)] +pub struct ServerBreaker { + entries: Mutex>, +} + +impl ServerBreaker { + pub fn new() -> Self { + Self::default() + } + + /// Test-only constructor that takes a clock — production code uses + /// [`new`](Self::new) which calls [`Instant::now`] internally. + pub fn check(&self, server: &str) -> Verdict { + self.check_at(server, Instant::now()) + } + + fn check_at(&self, server: &str, now: Instant) -> Verdict { + let entries = self.entries.lock().expect("breaker mutex poisoned"); + let Some(entry) = entries.get(server) else { + return Verdict::Allow; + }; + if entry.consecutive_failures < FAIL_THRESHOLD { + return Verdict::Allow; + } + let Some(opened_at) = entry.opened_at else { + return Verdict::Allow; + }; + let age = now.saturating_duration_since(opened_at); + if age >= COOLDOWN { + Verdict::AllowProbe + } else { + let remaining = COOLDOWN.saturating_sub(age).as_secs().max(1); + Verdict::Reject { + retry_in_secs: remaining, + } + } + } + + /// Reset the breaker for `server` — clears failure count and opened-at + /// timestamp. Call on any unambiguous success (successful tool call, + /// successful connect). + pub fn record_success(&self, server: &str) { + let mut entries = self.entries.lock().expect("breaker mutex poisoned"); + entries.remove(server); + } + + /// Record a transport-level failure for `server`. When the count + /// reaches [`FAIL_THRESHOLD`], stamps the opened-at timestamp so the + /// cooldown clock starts (or re-starts, for half-open probe failures). + pub fn record_failure(&self, server: &str) { + self.record_failure_at(server, Instant::now()); + } + + fn record_failure_at(&self, server: &str, now: Instant) { + let mut entries = self.entries.lock().expect("breaker mutex poisoned"); + let entry = entries.entry(server.to_string()).or_default(); + entry.consecutive_failures = entry.consecutive_failures.saturating_add(1); + if entry.consecutive_failures >= FAIL_THRESHOLD { + entry.opened_at = Some(now); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unknown_server_allows() { + let b = ServerBreaker::new(); + assert_eq!(b.check("foo"), Verdict::Allow); + } + + #[test] + fn under_threshold_allows() { + let b = ServerBreaker::new(); + b.record_failure("foo"); + b.record_failure("foo"); + assert_eq!(b.check("foo"), Verdict::Allow); + } + + #[test] + fn threshold_opens_breaker() { + let b = ServerBreaker::new(); + for _ in 0..FAIL_THRESHOLD { + b.record_failure("foo"); + } + match b.check("foo") { + Verdict::Reject { retry_in_secs } => { + assert!(retry_in_secs > 0 && retry_in_secs <= COOLDOWN.as_secs()); + } + v => panic!("expected Reject, got {v:?}"), + } + } + + #[test] + fn success_resets_count() { + let b = ServerBreaker::new(); + b.record_failure("foo"); + b.record_failure("foo"); + b.record_success("foo"); + b.record_failure("foo"); + assert_eq!(b.check("foo"), Verdict::Allow); + } + + #[test] + fn cooldown_elapsed_allows_probe() { + let b = ServerBreaker::new(); + let t0 = Instant::now(); + for _ in 0..FAIL_THRESHOLD { + b.record_failure_at("foo", t0); + } + assert!(matches!(b.check_at("foo", t0), Verdict::Reject { .. })); + let t1 = t0 + COOLDOWN + Duration::from_secs(1); + assert_eq!(b.check_at("foo", t1), Verdict::AllowProbe); + } + + #[test] + fn probe_failure_rearms_cooldown() { + let b = ServerBreaker::new(); + let t0 = Instant::now(); + for _ in 0..FAIL_THRESHOLD { + b.record_failure_at("foo", t0); + } + let t1 = t0 + COOLDOWN + Duration::from_secs(1); + assert_eq!(b.check_at("foo", t1), Verdict::AllowProbe); + b.record_failure_at("foo", t1); + match b.check_at("foo", t1) { + Verdict::Reject { retry_in_secs } => { + assert!(retry_in_secs >= COOLDOWN.as_secs() - 1); + } + v => panic!("expected Reject after probe failure, got {v:?}"), + } + } + + #[test] + fn probe_success_closes_breaker() { + let b = ServerBreaker::new(); + let t0 = Instant::now(); + for _ in 0..FAIL_THRESHOLD { + b.record_failure_at("foo", t0); + } + let t1 = t0 + COOLDOWN + Duration::from_secs(1); + assert_eq!(b.check_at("foo", t1), Verdict::AllowProbe); + b.record_success("foo"); + assert_eq!(b.check_at("foo", t1), Verdict::Allow); + } + + #[test] + fn per_server_isolation() { + let b = ServerBreaker::new(); + for _ in 0..FAIL_THRESHOLD { + b.record_failure("foo"); + } + assert!(matches!(b.check("foo"), Verdict::Reject { .. })); + assert_eq!(b.check("bar"), Verdict::Allow); + } + + #[test] + fn retry_in_secs_floor_is_one() { + let b = ServerBreaker::new(); + let t0 = Instant::now(); + for _ in 0..FAIL_THRESHOLD { + b.record_failure_at("foo", t0); + } + let t_almost = t0 + COOLDOWN - Duration::from_millis(10); + match b.check_at("foo", t_almost) { + Verdict::Reject { retry_in_secs } => assert_eq!(retry_in_secs, 1), + v => panic!("expected Reject, got {v:?}"), + } + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index bff844b19..3e610f96f 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,5 +1,6 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. +pub mod breaker; pub mod config; pub mod flow; pub mod meta_tool; From 6772a51eb71d88fc60c626ad1cc426ffef2eb2a3 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 16:17:25 +0000 Subject: [PATCH 97/98] feat(openab-agent/mcp): wire circuit breaker into runtime + meta_tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connects the per-server `ServerBreaker` introduced in 26c7d87 to the `McpRuntimeManager` connect path and the `meta_tool::call_tool` dispatch path (ADR §5.9 + #966 design decisions). - `connect()`: check breaker after the already-connected fast path; `Reject` short-circuits with a `retry in {n}s` hint. NeedsAuth bounces (missing/expired token, refresh failure) do NOT touch the breaker — those are auth-level, not transport-level. - Dial outcome at the tail of `connect()` records success/failure on the breaker, matching the single-counter / transport-only model resolved in #966 Q2. - `meta_tool::call_tool` wraps `peer.call_tool()`: wire-level `Err` = transport failure → trips the breaker; wire-level `Ok` (regardless of `CallToolResult.is_error`) resets it. `isError: true` content is protocol-normal payload, not a transport fault. - New `record_tool_call_outcome(name, ok)` is the only public surface the meta_tool path uses; the breaker stays private inside the manager. Tests: 2 new integration tests in `mcp::runtime::tests` — breaker_opens_after_threshold_consecutive_connect_failures (dead-port pattern, hermetic) and breaker_does_not_count_oauth_needs_auth_bounces (NeedsAuth isolation). Existing 9 breaker unit tests still pass. --- openab-agent/src/mcp/breaker.rs | 2 - openab-agent/src/mcp/meta_tool.rs | 17 +++++-- openab-agent/src/mcp/runtime.rs | 82 +++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 6 deletions(-) diff --git a/openab-agent/src/mcp/breaker.rs b/openab-agent/src/mcp/breaker.rs index cdb2fe1d9..9d31e6cb1 100644 --- a/openab-agent/src/mcp/breaker.rs +++ b/openab-agent/src/mcp/breaker.rs @@ -12,8 +12,6 @@ //! consecutive failures with no time window — going Hermes-simple here. //! Any success resets the counter. -#![allow(dead_code)] // wired into McpRuntimeManager in next slice - use std::collections::HashMap; use std::sync::Mutex; use std::time::{Duration, Instant}; diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index e9bd69806..7c6c64f81 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -93,10 +93,19 @@ async fn call_tool( .with_context(|| format!("connect mcp server {server:?}"))?; let peer = manager.arc_peer(server).await?; let params = rmcp::model::CallToolRequestParams::new(tool.to_string()).with_arguments(args_map); - let result = peer - .call_tool(params) - .await - .with_context(|| format!("call_tool {tool:?} on {server:?}"))?; + // Wire-level Err = transport failure → trips the breaker; wire-level + // Ok (even with `isError: true`) resets it. See ADR §5.9 / #966 Q2. + let result = match peer.call_tool(params).await { + Ok(r) => { + manager.record_tool_call_outcome(server, true); + r + } + Err(e) => { + manager.record_tool_call_outcome(server, false); + return Err(anyhow::Error::new(e)) + .with_context(|| format!("call_tool {tool:?} on {server:?}")); + } + }; serde_json::to_value(&result).context("serialize CallToolResult") } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 870e04e95..70189b133 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -25,6 +25,7 @@ use tokio::process::Command; use tokio::sync::RwLock; use tokio::task::AbortHandle; +use super::breaker::{ServerBreaker, Verdict}; use super::config::{McpConfig, ServerConfig}; use super::flow::{init_paste_authorize, parse_paste_callback}; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; @@ -121,6 +122,11 @@ pub struct McpRuntimeManager { /// observe the winner's rotated token instead of replaying a stale /// refresh_token (which providers like Google would cascade-revoke). refresh_locks: Arc>>>>, + /// Per-server circuit breaker (ADR §5.9). Counts consecutive + /// transport-level failures; once tripped, short-circuits `connect` + /// and tool-call dispatch until the cooldown elapses and a + /// half-open probe succeeds. + breaker: Arc, } impl McpRuntimeManager { @@ -147,6 +153,7 @@ impl McpRuntimeManager { auth_path, device_login_tasks: Arc::new(StdMutex::new(HashMap::new())), refresh_locks: Arc::new(StdMutex::new(HashMap::new())), + breaker: Arc::new(ServerBreaker::new()), } } @@ -638,6 +645,16 @@ impl McpRuntimeManager { if matches!(handle.status, ServerStatus::Connected) && handle.client.is_some() { return Ok(()); } + // Breaker check after the already-connected fast path so the + // hot tool-call path stays lock-free on the breaker map. Auth + // bounces below (`NeedsAuth`) don't increment the breaker — + // only the dial result at the end does, matching the "transport + // failures only" semantics from ADR §5.9 / Hermes. + if let Verdict::Reject { retry_in_secs } = self.breaker.check(name) { + return Err(anyhow!( + "mcp server {name:?} circuit-breaker open — retry in {retry_in_secs}s" + )); + } let resolved = handle.config.resolved(name)?; let plan = match resolved { ServerConfig::Stdio { @@ -714,15 +731,32 @@ impl McpRuntimeManager { Ok(client) => { handle.status = ServerStatus::Connected; handle.client = Some(Arc::new(client)); + self.breaker.record_success(name); Ok(()) } Err(e) => { let msg = format!("{e:#}"); handle.status = ServerStatus::Failed(msg.clone()); + self.breaker.record_failure(name); Err(anyhow!(msg)) } } } + + /// Record a tool-call outcome on the breaker. Called from + /// `meta_tool::call_tool` after `peer.call_tool().await` returns. + /// Wire-level `Ok` resets the counter regardless of `CallToolResult.is_error` + /// (the `isError` bit is protocol-normal payload, not a transport fault). + /// Wire-level `Err` is a transport-level failure and increments the + /// counter — matching the single-counter / transport-only model from + /// the #966 design decisions. + pub fn record_tool_call_outcome(&self, name: &str, ok: bool) { + if ok { + self.breaker.record_success(name); + } else { + self.breaker.record_failure(name); + } + } } /// Stringified provider name for the pending-state record. `Builtin` keeps @@ -1169,6 +1203,54 @@ mod tests { } } + #[tokio::test] + async fn breaker_opens_after_threshold_consecutive_connect_failures() { + // 127.0.0.1:1 hermetic dead-port (same pattern as the test above). + // After FAIL_THRESHOLD dial failures the breaker trips, and the + // next connect() short-circuits with the cooldown hint instead of + // attempting another dial. + let json = r#"{ + "mcpServers": { + "dead": { "type": "http", "url": "http://127.0.0.1:1/mcp" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + for _ in 0..crate::mcp::breaker::FAIL_THRESHOLD { + assert!(mgr.connect("dead").await.is_err()); + } + let err = mgr.connect("dead").await.unwrap_err().to_string(); + assert!( + err.contains("circuit-breaker open"), + "expected breaker hint in {err}" + ); + assert!(err.contains("retry in"), "expected retry hint in {err}"); + } + + #[tokio::test] + async fn breaker_does_not_count_oauth_needs_auth_bounces() { + // NeedsAuth is an auth-level state, not a transport-level failure; + // the breaker must NOT trip after repeated NeedsAuth returns. + let json = r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + for _ in 0..(crate::mcp::breaker::FAIL_THRESHOLD + 2) { + let err = mgr.connect("linear").await.unwrap_err().to_string(); + assert!( + err.contains("needs oauth login"), + "expected NeedsAuth bounce, got {err}" + ); + } + } + // start_paste_login + builtin_client_id race on the same OS env var — // `set_var` is unsound under concurrent reads, so serialize them. static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); From bf627323d7a3a51de00a55b6808d89f40edf47c2 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 16:25:07 +0000 Subject: [PATCH 98/98] =?UTF-8?q?feat(openab-agent/mcp):=20mcp=20doctor=20?= =?UTF-8?q?CLI=20subcommand=20(ADR=20=C2=A78)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the Phase 3 scope from #966 — interactive diagnostic that runs a live `connect()` against every configured server, with three checks per server (short-circuit on first ✗): 1. Config resolution — `${env:VAR}` placeholders resolve against the live process env. Missing vars are flagged with the offending name and a "set the var(s)" hint. 2. OAuth state (HTTP + `oauth:` only) — cached `TokenStore` in `auth.json`. Missing → `mcp login ` hint; expired → noted but not fatal (connect will attempt refresh). 3. Live connect — `manager.connect(name).await`. Errors surface verbatim, including the §5.9 circuit-breaker `retry in {n}s` hint when the breaker is open. Exits non-zero when any server fails so CI / scripts can wrap with `openab-agent mcp doctor || alert`. Reuses the existing `McpRuntimeManager` and `load_namespaced_token_at` primitives — no new runtime state, no new fields on `McpRuntimeManager`. Wired into the existing `mcp` subcommand dispatch in main.rs alongside `list` / `status` / `connect` / `login`. --- openab-agent/src/main.rs | 5 ++ openab-agent/src/mcp/mod.rs | 92 +++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index e4f848155..17578364d 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -42,6 +42,10 @@ enum McpAction { }, /// Show per-server runtime status Status, + /// Diagnose each configured server end-to-end: env vars, OAuth token, + /// live connect. Prints actionable remediation hints and exits + /// non-zero on any server failure (ADR §8). + Doctor, /// Spawn the configured server and run the MCP handshake (smoke-test). Connect { /// Server name as configured in mcp.json @@ -121,6 +125,7 @@ async fn main() { Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), McpAction::Status => mcp::cli_show_status().await, + McpAction::Doctor => mcp::cli_doctor().await, McpAction::Connect { name } => mcp::cli_connect(name).await, McpAction::Login { name, diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 3e610f96f..a47385da9 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -9,6 +9,7 @@ pub mod runtime; use serde_json::json; +use crate::auth::{auth_path, load_namespaced_token_at}; use crate::llm::ToolDef; use config::{McpConfig, ServerConfig}; @@ -295,6 +296,97 @@ pub async fn cli_login_device(name: String) { } } +/// `openab-agent mcp doctor`. Per-server diagnostic that runs a live +/// `connect()` against every configured server and surfaces the result +/// plus a remediation hint when something's broken (ADR §8). +/// +/// Per-server checks (run in order, short-circuit on first ✗): +/// 1. **Config resolution** — `${env:VAR}` placeholders resolve against +/// the live process env. Missing vars print the offending name + +/// hint to set it. +/// 2. **OAuth state** (HTTP + `oauth:` only) — cached `TokenStore` in +/// `auth.json`. Missing → `mcp login ` hint; expired → noted +/// but not fatal (connect will attempt refresh). +/// 3. **Live connect** — `manager.connect(name).await`. Any error is +/// surfaced verbatim (including the circuit-breaker `retry in {n}s` +/// hint from §5.9 when the breaker is open). +/// +/// Exits non-zero if any server fails diagnostic, so CI / scripts can +/// `openab-agent mcp doctor || alert`. +pub async fn cli_doctor() { + let cfg = load_config_or_exit(); + if cfg.servers.is_empty() { + println!("No MCP servers configured."); + println!(" global: ~/.openab/agent/mcp.json"); + println!(" project: ./.openab/agent/mcp.json"); + return; + } + let manager = McpRuntimeManager::from_config(cfg.clone()); + let auth = auth_path(); + let mut servers: Vec<_> = cfg.servers.iter().collect(); + servers.sort_by_key(|(name, _)| *name); + let mut failed = 0usize; + for (name, server) in &servers { + println!(); + println!("● {name} ({})", server.transport_label()); + if !doctor_server(&manager, &auth, name, server).await { + failed += 1; + } + } + println!(); + if failed == 0 { + println!("✓ all {} server(s) healthy", servers.len()); + } else { + println!( + "✗ {failed} of {} server(s) failed diagnostic", + servers.len() + ); + std::process::exit(1); + } +} + +/// Returns `true` if every check passed for this server, `false` on the +/// first failure (subsequent checks are skipped to keep the report focused +/// on the root cause). +async fn doctor_server( + manager: &McpRuntimeManager, + auth: &std::path::Path, + name: &str, + server: &ServerConfig, +) -> bool { + if let Err(e) = server.resolved(name) { + println!(" ✗ config: {e:#}"); + println!(" → set the missing env var(s) above and re-run"); + return false; + } + println!(" ✓ config: env vars resolved"); + if let ServerConfig::Http { oauth: Some(_), .. } = server { + match load_namespaced_token_at(auth, name) { + Ok(store) if !store.is_expired() => { + println!(" ✓ oauth: valid token cached"); + } + Ok(_) => { + println!(" ⚠ oauth: token expired (connect will attempt refresh)"); + } + Err(_) => { + println!(" ✗ oauth: no token cached"); + println!(" → run `openab-agent mcp login {name}`"); + return false; + } + } + } + match manager.connect(name).await { + Ok(()) => { + println!(" ✓ connect: handshake succeeded"); + true + } + Err(e) => { + println!(" ✗ connect: {e:#}"); + false + } + } +} + fn read_redirect_from_stdin() -> std::io::Result { use std::io::Write; print!("Paste the FULL redirect URL: ");