From 5f82d2b8004854f604ba5c78c1fda433f71bd328 Mon Sep 17 00:00:00 2001 From: jinku Date: Mon, 11 May 2026 11:26:31 -0700 Subject: [PATCH 1/4] feat: default Claude review/task model to opus-4.7[1m] + xhigh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply opus + xhigh effort as the global default whenever the user does not pass --model or --effort to review, adversarial-review, or task. The opus and sonnet aliases now resolve to their 1M-context variants (claude-opus-4-7[1m], claude-sonnet-4-6[1m]); haiku stays on the standard claude-haiku-4-5. Per-model effort defaults: xhigh for opus, high for sonnet, unset for haiku. xhigh is promoted to a first-class VALID_EFFORTS member and is no longer aliased to max — max is reserved for users who explicitly request it. Also expose --effort on review and adversarial-review so callers can override the model-derived default, and update SKILL.md, README, and the cli-runtime internal reference accordingly. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 13 +- internal-skills/cli-runtime/runtime.md | 3 +- scripts/claude-companion.mjs | 39 ++++-- scripts/lib/claude-cli.mjs | 103 +++++++++++++- skills/adversarial-review/SKILL.md | 4 +- skills/rescue/SKILL.md | 4 +- skills/review/SKILL.md | 4 +- tests/claude-cli.test.mjs | 181 +++++++++++++++++++++++-- 8 files changed, 314 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 36ccd93..bf39c4e 100644 --- a/README.md +++ b/README.md @@ -128,14 +128,17 @@ Quick routing rule: Standard read-only review of your current work. ```text -$cc:review # review uncommitted changes +$cc:review # review uncommitted changes (default: opus + xhigh effort) $cc:review --base main # review branch vs main $cc:review --scope branch # explicitly compare branch tip to base $cc:review --background # run in background, check with $cc:status later -$cc:review --model sonnet # use a specific Claude model +$cc:review --model sonnet # switch to sonnet (defaults to high effort) +$cc:review --model opus --effort high # opus with a lighter effort ``` -**Flags:** `--base `, `--scope `, `--wait`, `--background`, `--model ` +**Flags:** `--base `, `--scope `, `--wait`, `--background`, `--model `, `--effort ` + +**Defaults:** model `opus` (resolves to `claude-opus-4-7[1m]`, the 1M-context variant) with `xhigh` effort. If you pick `sonnet`, it resolves to `claude-sonnet-4-6[1m]` (also 1M context) and the default effort drops to `high`. `haiku` resolves to `claude-haiku-4-5` and has no effort setting. Pass `--model` and `--effort` to override. Scope `auto` (the default) inspects `git status` and chooses between working-tree and branch automatically. @@ -179,8 +182,8 @@ $cc:rescue --model sonnet --effort medium investigate the flaky test | `--resume-last` | Alias for `--resume` | | `--fresh` | Force a new task (don't resume) | | `--write` | Allow file edits (default) | -| `--model ` | Claude model (`sonnet`, `haiku`, or full ID) | -| `--effort ` | Reasoning effort: `low`, `medium`, `high`, `max` | +| `--model ` | Claude model (`opus`, `sonnet`, `haiku`, or full ID; defaults to `opus`. The `opus` and `sonnet` aliases resolve to their 1M-context variants `claude-opus-4-7[1m]` and `claude-sonnet-4-6[1m]`.) | +| `--effort ` | Reasoning effort: `low`, `medium`, `high`, `xhigh`, `max` (default: `xhigh` for opus, `high` for sonnet, unset for haiku) | | `--prompt-file ` | Read task description from a file | **Resume behavior:** If you don't pass `--resume` or `--fresh`, rescue checks for a resumable Claude session and asks once whether to continue or start fresh. Your phrasing guides the recommendation — "continue the last run" → resume, "start over" → fresh. diff --git a/internal-skills/cli-runtime/runtime.md b/internal-skills/cli-runtime/runtime.md index 77b78d3..c638ec2 100644 --- a/internal-skills/cli-runtime/runtime.md +++ b/internal-skills/cli-runtime/runtime.md @@ -24,8 +24,7 @@ Command selection: Routing controls: - Treat `--model`, `--effort`, `--resume`, `--resume-last`, `--fresh`, `--prompt-file`, `--view-state`, `--owner-session-id`, and `--job-id` as routing controls, not task text. -- Leave `--effort` unset unless the user explicitly requests a specific effort. -- Leave model unset by default. Add `--model` only when the user explicitly asks for one. +- Leave `--model` and `--effort` unset unless the user explicitly asks for a specific model or effort. The companion command applies these defaults itself: model defaults to `opus`, effort defaults to `xhigh` for opus, `high` for sonnet, and is left unset for haiku. - `--view-state on-success` means the user will see this companion result in the current turn, so the companion may mark it viewed on success. - `--view-state defer` means the parent is not waiting, so the companion must leave the result unread until the user explicitly checks it. - `--owner-session-id ` is an internal parent-session routing control. Preserve it when present so tracked jobs remain visible to the parent session's `$cc:status` / `$cc:result`. diff --git a/scripts/claude-companion.mjs b/scripts/claude-companion.mjs index 9e5df0e..e34fa25 100644 --- a/scripts/claude-companion.mjs +++ b/scripts/claude-companion.mjs @@ -10,9 +10,11 @@ * * Adapted from codex-companion.mjs: * - Uses claude-cli.mjs instead of app-server/broker - * - MODEL_ALIASES: sonnet -> claude-sonnet-4-6, haiku -> claude-haiku-4-5 - * - Claude CLI effort values: low, medium, high, max - * - Legacy effort aliases: none|minimal -> low, xhigh -> max + * - MODEL_ALIASES: opus -> claude-opus-4-7[1m], sonnet -> claude-sonnet-4-6[1m], haiku -> claude-haiku-4-5 + * - Default model when --model is unset: opus + * - Default effort by model: opus -> xhigh, sonnet -> high, haiku -> unset + * - Claude CLI effort values: low, medium, high, xhigh, max + * - Legacy effort aliases: none|minimal -> low * - Review gate matches upstream setup semantics: Stop hook runs when enabled * * Subcommands: @@ -37,6 +39,8 @@ import { cancelClaudeProcess, MODEL_ALIASES, resolveEffort, + resolveDefaultModel, + resolveDefaultEffort, SANDBOX_READ_ONLY_TOOLS, createSandboxSettings, cleanupSandboxSettings, @@ -112,9 +116,9 @@ function printUsage() { [ "Usage:", " node scripts/claude-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]", - " node scripts/claude-companion.mjs review [--wait|--background] [--base ] [--scope ]", - " node scripts/claude-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [focus text]", - " node scripts/claude-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", + " node scripts/claude-companion.mjs review [--wait|--background] [--base ] [--scope ] [--model ] [--effort ]", + " node scripts/claude-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [--model ] [--effort ] [focus text]", + " node scripts/claude-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", " node scripts/claude-companion.mjs status [job-id] [--all] [--json]", " node scripts/claude-companion.mjs result [job-id] [--json]", " node scripts/claude-companion.mjs cancel [job-id] [--json]", @@ -478,6 +482,7 @@ async function executeReviewRun(request) { try { result = await runClaudeReview(request.cwd, prompt, { model: request.model, + effort: request.effort, onProgress: request.onProgress, onSpawn: request.onSpawn, permissionMode: "dontAsk", @@ -536,6 +541,7 @@ async function executeReviewRun(request) { schema, { model: request.model, + effort: request.effort, onProgress: request.onProgress, onSpawn: request.onSpawn, permissionMode: "dontAsk", @@ -825,11 +831,12 @@ function buildReviewRequest({ base, scope, model, + effort, focusText, reviewName, markViewedOnSuccess }) { - return { cwd, base, scope, model, focusText, reviewName, markViewedOnSuccess }; + return { cwd, base, scope, model, effort, focusText, reviewName, markViewedOnSuccess }; } function spawnDetachedReviewWorker(cwd, jobId) { @@ -1183,7 +1190,7 @@ async function resolveLatestResumableSession(cwd, options = {}) { async function handleReviewCommand(argv, config) { const { options, positionals } = parseCommandInput(argv, { - valueOptions: ["base", "scope", "model", "cwd", "view-state", "job-id", "owner-session-id"], + valueOptions: ["base", "scope", "model", "effort", "cwd", "view-state", "job-id", "owner-session-id"], booleanOptions: ["json", "background", "wait"], aliasMap: { m: "model" @@ -1205,6 +1212,10 @@ async function handleReviewCommand(argv, config) { Boolean(options.background) ); + const requestedModel = normalizeRequestedModel(options.model); + const resolvedModel = resolveDefaultModel(requestedModel); + const resolvedEffort = resolveDefaultEffort(resolvedModel, options.effort); + await withReleasedReservation(workspaceRoot, explicitJobId, async () => { // Validate inside the reservation guard so failures do not leak markers. config.validateRequest?.(target, focusText); @@ -1227,7 +1238,8 @@ async function handleReviewCommand(argv, config) { cwd, base: options.base, scope: options.scope, - model: options.model, + model: resolvedModel, + effort: resolvedEffort, focusText, reviewName: config.reviewName, markViewedOnSuccess @@ -1248,7 +1260,8 @@ async function handleReviewCommand(argv, config) { cwd, base: options.base, scope: options.scope, - model: options.model, + model: resolvedModel, + effort: resolvedEffort, focusText, reviewName: config.reviewName, onProgress: progress, @@ -1300,8 +1313,10 @@ async function handleTask(argv) { const cwd = resolveCommandCwd(options); const workspaceRoot = resolveCommandWorkspace(options); - const model = normalizeRequestedModel(options.model); - const effort = resolveEffort(options.effort) ?? null; + const requestedModel = normalizeRequestedModel(options.model); + const model = resolveDefaultModel(requestedModel); + const resolvedEffort = resolveDefaultEffort(model, options.effort); + const effort = resolvedEffort ? resolveEffort(resolvedEffort) : null; const prompt = readTaskPrompt(cwd, options, positionals); const markViewedOnSuccess = resolveMarkViewedOnSuccess( options["view-state"], diff --git a/scripts/lib/claude-cli.mjs b/scripts/lib/claude-cli.mjs index 3cff9f7..770079a 100644 --- a/scripts/lib/claude-cli.mjs +++ b/scripts/lib/claude-cli.mjs @@ -16,6 +16,16 @@ import { normalizePathSlashes, resolvePluginRuntimeRoot } from "./codex-paths.mj import { getProcessIdentity, validateProcessIdentity } from "./process.mjs"; const CLAUDE_BIN = "claude"; +const CLAUDE_LOCAL_OVERRIDE_ENV_KEYS = [ + "ANTHROPIC_BASE_URL", + "ANTHROPIC_API_BASE_URL", + "HTTP_PROXY", + "HTTPS_PROXY", + "ALL_PROXY", + "http_proxy", + "https_proxy", + "all_proxy", +]; export const MAX_STREAM_PARSER_UNKNOWN_EVENTS = 50; export const MAX_STREAM_PARSER_PARSE_ERRORS = 50; export const MAX_STREAM_PARSER_TOOL_USES = 256; @@ -71,6 +81,66 @@ function appendTextTail(existing, chunk, maxBytes) { return sliceTextTailByBytes(next, maxBytes); } +function resolveClaudeSettingsPath(settingsPath = null) { + if (settingsPath) { + return settingsPath; + } + const claudeConfigDir = process.env.CLAUDE_CONFIG_DIR || path.join(os.homedir(), ".claude"); + return path.join(claudeConfigDir, "settings.json"); +} + +function loadClaudeConfiguredEnv(settingsPath = null) { + const resolvedPath = resolveClaudeSettingsPath(settingsPath); + try { + if (!fs.existsSync(resolvedPath)) { + return {}; + } + const parsed = JSON.parse(fs.readFileSync(resolvedPath, "utf8")); + if (parsed?.env && typeof parsed.env === "object" && !Array.isArray(parsed.env)) { + return parsed.env; + } + return {}; + } catch { + return null; + } +} + +function isLoopbackUrl(rawValue) { + if (!rawValue || typeof rawValue !== "string") { + return false; + } + try { + const parsed = new URL(rawValue); + return new Set(["127.0.0.1", "localhost", "::1", "[::1]"]).has(parsed.hostname); + } catch { + return false; + } +} + +export function buildClaudeSpawnEnv(baseEnv = process.env, options = {}) { + const nextEnv = { ...baseEnv }; + const configuredEnv = Object.prototype.hasOwnProperty.call(options, "configuredEnv") + ? options.configuredEnv + : loadClaudeConfiguredEnv(options.settingsPath ?? null); + + // If the Claude settings file is unreadable, preserve the inherited env. + if (configuredEnv == null) { + return nextEnv; + } + + for (const key of CLAUDE_LOCAL_OVERRIDE_ENV_KEYS) { + const value = nextEnv[key]; + if (!value || Object.prototype.hasOwnProperty.call(configuredEnv, key)) { + continue; + } + if (isLoopbackUrl(value)) { + delete nextEnv[key]; + } + } + + return nextEnv; +} + // --------------------------------------------------------------------------- // Availability & Auth // --------------------------------------------------------------------------- @@ -435,17 +505,43 @@ export function cleanupSandboxSettings(filePath) { // --------------------------------------------------------------------------- export const MODEL_ALIASES = new Map([ - ["sonnet", "claude-sonnet-4-6"], + ["opus", "claude-opus-4-7[1m]"], + ["sonnet", "claude-sonnet-4-6[1m]"], ["haiku", "claude-haiku-4-5"], ]); export const EFFORT_ALIASES = { none: "low", minimal: "low", - xhigh: "max", }; -export const VALID_EFFORTS = new Set(["low", "medium", "high", "max"]); +export const VALID_EFFORTS = new Set(["low", "medium", "high", "xhigh", "max"]); + +export const DEFAULT_MODEL = "opus"; + +export const DEFAULT_EFFORT_BY_MODEL = new Map([ + ["opus", "xhigh"], + ["claude-opus-4-7", "xhigh"], + ["claude-opus-4-7[1m]", "xhigh"], + ["sonnet", "high"], + ["claude-sonnet-4-6", "high"], + ["claude-sonnet-4-6[1m]", "high"], +]); + +export function resolveDefaultModel(model) { + if (model == null || String(model).trim() === "") { + return DEFAULT_MODEL; + } + return model; +} + +export function resolveDefaultEffort(model, effort) { + if (effort != null && String(effort).trim() !== "") { + return effort; + } + const key = String(model ?? "").trim().toLowerCase(); + return DEFAULT_EFFORT_BY_MODEL.get(key); +} export function resolveModel(model) { if (!model) return undefined; @@ -543,6 +639,7 @@ export async function runClaudeTurn(cwd, prompt, options = {}) { cwd, detached: true, // new process group for safe cancellation stdio: ["ignore", "pipe", "pipe"], // stdin ignored — prompt is passed as CLI arg + env: buildClaudeSpawnEnv(process.env), }); let pidIdentity = null; diff --git a/skills/adversarial-review/SKILL.md b/skills/adversarial-review/SKILL.md index bfe3f12..e1b22cb 100644 --- a/skills/adversarial-review/SKILL.md +++ b/skills/adversarial-review/SKILL.md @@ -1,6 +1,6 @@ --- name: adversarial-review -description: 'Run a design-challenging Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model , [focus text]. Use only when the user wants stronger scrutiny than a normal review, such as explicit tradeoff challenge, risky-change review, or custom focus text.' +description: 'Run a design-challenging Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model , --effort , [focus text]. Defaults to opus + xhigh effort. Use only when the user wants stronger scrutiny than a normal review, such as explicit tradeoff challenge, risky-change review, or custom focus text.' --- # Claude Code Adversarial Review @@ -16,7 +16,7 @@ Unlike `$cc:review`, this skill accepts custom focus text after the flags. The m Do not derive the companion path from this skill file or any cache directory. Always run the installed copy: `node "/scripts/claude-companion.mjs" adversarial-review ...` -Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model `, plus optional focus text after the flags +Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model `, `--effort `, plus optional focus text after the flags (defaults: model=opus, effort=xhigh; sonnet defaults to high; haiku has no effort) Raw slash-command arguments: `$ARGUMENTS` diff --git a/skills/rescue/SKILL.md b/skills/rescue/SKILL.md index cd160ca..c8b1c5b 100644 --- a/skills/rescue/SKILL.md +++ b/skills/rescue/SKILL.md @@ -1,6 +1,6 @@ --- name: rescue -description: 'Delegate a substantial diagnosis, implementation, or follow-up task to Claude Code through the tracked-job runtime. Args: --background, --wait, --resume, --resume-last, --fresh, --write, --model , --effort , --prompt-file , [task text]. Use when Claude should investigate or change things, not when the user only wants review findings.' +description: 'Delegate a substantial diagnosis, implementation, or follow-up task to Claude Code through the tracked-job runtime. Args: --background, --wait, --resume, --resume-last, --fresh, --write, --model , --effort , --prompt-file , [task text]. Defaults to opus + xhigh effort. Use when Claude should investigate or change things, not when the user only wants review findings.' --- # Claude Code Rescue @@ -22,7 +22,7 @@ Do not derive the companion path from this skill file or any cache directory. Al Raw slash-command arguments: `$ARGUMENTS` -Supported arguments: `--background`, `--wait`, `--resume`, `--resume-last`, `--fresh`, `--write`, `--model `, `--effort `, `--prompt-file `, plus free-text task text +Supported arguments: `--background`, `--wait`, `--resume`, `--resume-last`, `--fresh`, `--write`, `--model `, `--effort `, `--prompt-file `, plus free-text task text Main-thread routing rules: - If the user explicitly invoked `$cc:rescue` or `Claude Code Rescue`, do not keep the work in the main Codex thread. Delegate it. diff --git a/skills/review/SKILL.md b/skills/review/SKILL.md index 0350478..a2d05b1 100644 --- a/skills/review/SKILL.md +++ b/skills/review/SKILL.md @@ -1,6 +1,6 @@ --- name: review -description: 'Run a standard Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model . Use as the default path for ordinary code-review requests when the user did not explicitly ask for stronger adversarial scrutiny or for Claude to own the implementation work.' +description: 'Run a standard Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model , --effort . Defaults to opus + xhigh effort. Use as the default path for ordinary code-review requests when the user did not explicitly ask for stronger adversarial scrutiny or for Claude to own the implementation work.' --- # Claude Code Review @@ -16,7 +16,7 @@ If the overall request is "you review it too, also ask Claude to review in the b Do not derive the companion path from this skill file or any cache directory. Always run the installed copy: `node "/scripts/claude-companion.mjs" review ...` -Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model ` +Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model `, `--effort ` (defaults: model=opus, effort=xhigh; sonnet defaults to high; haiku has no effort) Raw slash-command arguments: `$ARGUMENTS` diff --git a/tests/claude-cli.test.mjs b/tests/claude-cli.test.mjs index 5aea6f1..7ef679a 100644 --- a/tests/claude-cli.test.mjs +++ b/tests/claude-cli.test.mjs @@ -10,10 +10,15 @@ import { validateTurnCompletion, resolveModel, resolveEffort, + resolveDefaultModel, + resolveDefaultEffort, buildArgs, + buildClaudeSpawnEnv, MODEL_ALIASES, EFFORT_ALIASES, VALID_EFFORTS, + DEFAULT_MODEL, + DEFAULT_EFFORT_BY_MODEL, SANDBOX_READ_ONLY_BASH_TOOLS, SANDBOX_READ_ONLY_TOOLS, SANDBOX_TEMP_DIR, @@ -423,8 +428,8 @@ describe("validateTurnCompletion", () => { // =========================================================================== describe("resolveModel", () => { - it("maps 'sonnet' to 'claude-sonnet-4-6'", () => { - assert.equal(resolveModel("sonnet"), "claude-sonnet-4-6"); + it("maps 'sonnet' to the 1M variant 'claude-sonnet-4-6[1m]'", () => { + assert.equal(resolveModel("sonnet"), "claude-sonnet-4-6[1m]"); }); it("maps 'haiku' to 'claude-haiku-4-5'", () => { @@ -444,13 +449,87 @@ describe("resolveModel", () => { assert.equal(resolveModel(""), undefined); }); + it("maps 'opus' to the 1M variant 'claude-opus-4-7[1m]'", () => { + assert.equal(resolveModel("opus"), "claude-opus-4-7[1m]"); + }); + it("MODEL_ALIASES map has expected entries", () => { - assert.equal(MODEL_ALIASES.size, 2); + assert.equal(MODEL_ALIASES.size, 3); + assert.ok(MODEL_ALIASES.has("opus")); assert.ok(MODEL_ALIASES.has("sonnet")); assert.ok(MODEL_ALIASES.has("haiku")); }); }); +// =========================================================================== +// resolveDefaultModel / resolveDefaultEffort +// =========================================================================== + +describe("resolveDefaultModel", () => { + it("returns 'opus' when model is null/undefined/empty", () => { + assert.equal(resolveDefaultModel(null), "opus"); + assert.equal(resolveDefaultModel(undefined), "opus"); + assert.equal(resolveDefaultModel(""), "opus"); + assert.equal(resolveDefaultModel(" "), "opus"); + }); + + it("passes through an explicit model", () => { + assert.equal(resolveDefaultModel("sonnet"), "sonnet"); + assert.equal(resolveDefaultModel("haiku"), "haiku"); + assert.equal(resolveDefaultModel("claude-opus-4-7"), "claude-opus-4-7"); + }); + + it("exposes DEFAULT_MODEL constant as 'opus'", () => { + assert.equal(DEFAULT_MODEL, "opus"); + }); +}); + +describe("resolveDefaultEffort", () => { + it("defaults to xhigh for opus alias and resolved id (including 1M variant)", () => { + assert.equal(resolveDefaultEffort("opus", null), "xhigh"); + assert.equal(resolveDefaultEffort("claude-opus-4-7", null), "xhigh"); + assert.equal(resolveDefaultEffort("claude-opus-4-7[1m]", null), "xhigh"); + assert.equal(resolveDefaultEffort("OPUS", undefined), "xhigh"); + }); + + it("defaults to high for sonnet alias and resolved id (including 1M variant)", () => { + assert.equal(resolveDefaultEffort("sonnet", null), "high"); + assert.equal(resolveDefaultEffort("claude-sonnet-4-6", null), "high"); + assert.equal(resolveDefaultEffort("claude-sonnet-4-6[1m]", null), "high"); + }); + + it("returns undefined for haiku (no effort default)", () => { + assert.equal(resolveDefaultEffort("haiku", null), undefined); + assert.equal(resolveDefaultEffort("claude-haiku-4-5", undefined), undefined); + }); + + it("returns undefined for unknown model when effort not provided", () => { + assert.equal(resolveDefaultEffort("some-future-model", null), undefined); + }); + + it("preserves an explicit effort regardless of model", () => { + assert.equal(resolveDefaultEffort("opus", "low"), "low"); + assert.equal(resolveDefaultEffort("sonnet", "medium"), "medium"); + assert.equal(resolveDefaultEffort("haiku", "high"), "high"); + assert.equal(resolveDefaultEffort(null, "max"), "max"); + }); + + it("treats blank effort as missing", () => { + assert.equal(resolveDefaultEffort("opus", ""), "xhigh"); + assert.equal(resolveDefaultEffort("opus", " "), "xhigh"); + }); + + it("DEFAULT_EFFORT_BY_MODEL contains the expected entries", () => { + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("opus"), "xhigh"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-opus-4-7"), "xhigh"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-opus-4-7[1m]"), "xhigh"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("sonnet"), "high"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-sonnet-4-6"), "high"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-sonnet-4-6[1m]"), "high"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.has("haiku"), false); + }); +}); + // =========================================================================== // resolveEffort // =========================================================================== @@ -476,8 +555,8 @@ describe("resolveEffort", () => { assert.equal(resolveEffort("high"), "high"); }); - it("maps 'xhigh' to 'max'", () => { - assert.equal(resolveEffort("xhigh"), "max"); + it("passes 'xhigh' through as a first-class effort", () => { + assert.equal(resolveEffort("xhigh"), "xhigh"); }); it("maps 'max' to 'max'", () => { @@ -486,6 +565,7 @@ describe("resolveEffort", () => { it("normalizes canonical effort values to lowercase", () => { assert.equal(resolveEffort("HIGH"), "high"); + assert.equal(resolveEffort("XHIGH"), "xhigh"); }); it("throws on unsupported effort values", () => { @@ -500,19 +580,19 @@ describe("resolveEffort", () => { assert.equal(resolveEffort(undefined), undefined); }); - it("VALID_EFFORTS contains low, medium, high, max", () => { + it("VALID_EFFORTS contains low, medium, high, xhigh, max", () => { assert.ok(VALID_EFFORTS.has("low")); assert.ok(VALID_EFFORTS.has("medium")); assert.ok(VALID_EFFORTS.has("high")); + assert.ok(VALID_EFFORTS.has("xhigh")); assert.ok(VALID_EFFORTS.has("max")); - assert.equal(VALID_EFFORTS.size, 4); + assert.equal(VALID_EFFORTS.size, 5); }); it("EFFORT_ALIASES only contains legacy compatibility mappings", () => { assert.deepEqual(EFFORT_ALIASES, { none: "low", minimal: "low", - xhigh: "max", }); }); }); @@ -564,13 +644,20 @@ describe("buildArgs", () => { const args = buildArgs("p", { model: "sonnet" }); const idx = args.indexOf("--model"); assert.ok(idx >= 0); - assert.equal(args[idx + 1], "claude-sonnet-4-6"); + assert.equal(args[idx + 1], "claude-sonnet-4-6[1m]"); }); it("includes --effort with resolved effort", () => { const args = buildArgs("p", { effort: "xhigh" }); const idx = args.indexOf("--effort"); assert.ok(idx >= 0); + assert.equal(args[idx + 1], "xhigh"); + }); + + it("passes 'max' through as --effort max when explicitly requested", () => { + const args = buildArgs("p", { effort: "max" }); + const idx = args.indexOf("--effort"); + assert.ok(idx >= 0); assert.equal(args[idx + 1], "max"); }); @@ -635,6 +722,82 @@ describe("buildArgs", () => { }); }); +// =========================================================================== +// buildClaudeSpawnEnv +// =========================================================================== + +describe("buildClaudeSpawnEnv", () => { + it("removes an inherited localhost Anthropic base URL when settings no longer define it", () => { + const env = buildClaudeSpawnEnv( + { + ANTHROPIC_BASE_URL: "http://127.0.0.1:55453", + CLAUDE_CODE_EFFORT_LEVEL: "xhigh", + }, + { configuredEnv: { CLAUDE_CODE_EFFORT_LEVEL: "xhigh" } } + ); + + assert.equal(env.ANTHROPIC_BASE_URL, undefined); + assert.equal(env.CLAUDE_CODE_EFFORT_LEVEL, "xhigh"); + }); + + it("keeps an explicitly configured localhost base URL", () => { + const env = buildClaudeSpawnEnv( + { ANTHROPIC_BASE_URL: "http://127.0.0.1:55453" }, + { configuredEnv: { ANTHROPIC_BASE_URL: "http://127.0.0.1:55453" } } + ); + + assert.equal(env.ANTHROPIC_BASE_URL, "http://127.0.0.1:55453"); + }); + + it("keeps non-local remote overrides even when they are not in settings", () => { + const env = buildClaudeSpawnEnv( + { ANTHROPIC_BASE_URL: "https://gateway.example.com" }, + { configuredEnv: {} } + ); + + assert.equal(env.ANTHROPIC_BASE_URL, "https://gateway.example.com"); + }); + + it("removes stale localhost proxy env vars too", () => { + const env = buildClaudeSpawnEnv( + { + ALL_PROXY: "http://localhost:8083", + HTTPS_PROXY: "http://localhost:8080", + HTTP_PROXY: "http://127.0.0.1:8787", + https_proxy: "http://localhost:8081", + http_proxy: "http://127.0.0.1:8788", + all_proxy: "http://localhost:8082", + }, + { configuredEnv: {} } + ); + + assert.equal(env.ALL_PROXY, undefined); + assert.equal(env.HTTPS_PROXY, undefined); + assert.equal(env.HTTP_PROXY, undefined); + assert.equal(env.https_proxy, undefined); + assert.equal(env.http_proxy, undefined); + assert.equal(env.all_proxy, undefined); + }); + + it("removes IPv6 loopback overrides too", () => { + const env = buildClaudeSpawnEnv( + { ANTHROPIC_BASE_URL: "http://[::1]:55453" }, + { configuredEnv: {} } + ); + + assert.equal(env.ANTHROPIC_BASE_URL, undefined); + }); + + it("preserves inherited env when Claude settings are unreadable", () => { + const env = buildClaudeSpawnEnv( + { ANTHROPIC_BASE_URL: "http://127.0.0.1:55453" }, + { configuredEnv: null } + ); + + assert.equal(env.ANTHROPIC_BASE_URL, "http://127.0.0.1:55453"); + }); +}); + // =========================================================================== // SANDBOX_READ_ONLY_TOOLS constant // =========================================================================== From 105b8cda5506725257e81d634506ef46ad51bae9 Mon Sep 17 00:00:00 2001 From: jinku Date: Mon, 11 May 2026 15:07:25 -0700 Subject: [PATCH 2/4] feat(review): isolate review/adversarial-review in ephemeral worktree + git MCP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the previous Bash-pattern allowlist (which the Claude CLI does not strictly enforce — once Bash appears with any sub-pattern, the entire Bash tool opens up) with a tighter design built around three layers: 1. A bundled read-only git MCP server (`scripts/lib/mcp-git.mjs`) exposing `diff`, `log`, `show`, `blame`, `status`, `grep`, `ls_files`. Argv is constructed inside the server with spawnSync (no shell), refs and paths are validated against tight character classes, and pathspecs cannot escape the configured git root. 2. An ephemeral `git worktree` created per review run, with cleanup in a nested try/finally so every setup failure path still tears it down. A stale-worktree sweeper runs at the start of each review. 3. An updated `SANDBOX_REVIEW_TOOLS` allowlist that drops Bash entirely and adds only `mcp__gitReview__*` plus Read/Glob/Grep/Web. The read-only sandbox preset stops blocking network so WebFetch/WebSearch and the Claude CLI's own API path keep working. `runClaudeReview` and `runClaudeAdversarialReview` now construct the worktree, spawn the MCP server via a generated `--mcp-config`, run Claude inside the worktree, and clean everything up on the way out. Tests cover the MCP request dispatch, every tool handler (including injection rejection), the worktree lifecycle, the new allowlist contract, and the new buildArgs flags (`--mcp-config`, `--strict-mcp-config`). Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/claude-companion.mjs | 90 ++++-- scripts/lib/claude-cli.mjs | 120 +++++++- scripts/lib/mcp-git.mjs | 515 ++++++++++++++++++++++++++++++++ scripts/lib/review-worktree.mjs | 147 +++++++++ tests/claude-cli.test.mjs | 7 +- tests/mcp-git.test.mjs | 294 ++++++++++++++++++ tests/review-worktree.test.mjs | 171 +++++++++++ tests/sandbox-modes.test.mjs | 118 +++++++- 8 files changed, 1426 insertions(+), 36 deletions(-) create mode 100644 scripts/lib/mcp-git.mjs create mode 100644 scripts/lib/review-worktree.mjs create mode 100644 tests/mcp-git.test.mjs create mode 100644 tests/review-worktree.test.mjs diff --git a/scripts/claude-companion.mjs b/scripts/claude-companion.mjs index e34fa25..825d4f2 100644 --- a/scripts/claude-companion.mjs +++ b/scripts/claude-companion.mjs @@ -44,7 +44,13 @@ import { SANDBOX_READ_ONLY_TOOLS, createSandboxSettings, cleanupSandboxSettings, + createReviewMcpConfig, + cleanupReviewMcpConfig, } from "./lib/claude-cli.mjs"; +import { + createReviewWorktree, + pruneStaleReviewWorktrees, +} from "./lib/review-worktree.mjs"; import { readStdinIfPiped } from "./lib/fs.mjs"; import { collectReviewContext, @@ -466,6 +472,9 @@ async function executeReviewRun(request) { ensureClaudeReady(request.cwd); ensureGitRepository(request.cwd); + // Sweep dead worktrees from previous crashed runs before allocating a new one. + try { pruneStaleReviewWorktrees(request.cwd); } catch {} + const target = resolveReviewTarget(request.cwd, { base: request.base, scope: request.scope @@ -474,20 +483,32 @@ async function executeReviewRun(request) { const reviewName = request.reviewName ?? "Review"; if (reviewName === "Review") { - // Standard review via Claude CLI — read-only sandbox + // Standard review via Claude CLI — read-only sandbox + ephemeral worktree. const context = collectReviewContext(request.cwd, target); const prompt = buildReviewPrompt(context); - const sandboxSettingsFile = createSandboxSettings("read-only"); let result; + const sandboxSettingsFile = createSandboxSettings("read-only"); try { - result = await runClaudeReview(request.cwd, prompt, { - model: request.model, - effort: request.effort, - onProgress: request.onProgress, - onSpawn: request.onSpawn, - permissionMode: "dontAsk", - settingsFile: sandboxSettingsFile, - }); + const worktree = createReviewWorktree(request.cwd, { label: "review" }); + try { + const mcpConfigFile = createReviewMcpConfig(worktree.path); + try { + result = await runClaudeReview(worktree.path, prompt, { + model: request.model, + effort: request.effort, + onProgress: request.onProgress, + onSpawn: request.onSpawn, + permissionMode: "dontAsk", + settingsFile: sandboxSettingsFile, + mcpConfigFile, + strictMcpConfig: true, + }); + } finally { + cleanupReviewMcpConfig(mcpConfigFile); + } + } finally { + worktree.cleanup(); + } } finally { cleanupSandboxSettings(sandboxSettingsFile); } @@ -528,26 +549,38 @@ async function executeReviewRun(request) { }; } - // Adversarial review with structured output — read-only sandbox + // Adversarial review with structured output — read-only sandbox + ephemeral worktree. const context = collectReviewContext(request.cwd, target); const prompt = buildAdversarialReviewPrompt(context, focusText); const schema = readOutputSchema(REVIEW_SCHEMA_PATH); - const sandboxSettingsFile = createSandboxSettings("read-only"); let result; + const sandboxSettingsFile = createSandboxSettings("read-only"); try { - result = await runClaudeAdversarialReview( - context.repoRoot, - prompt, - schema, - { - model: request.model, - effort: request.effort, - onProgress: request.onProgress, - onSpawn: request.onSpawn, - permissionMode: "dontAsk", - settingsFile: sandboxSettingsFile, + const worktree = createReviewWorktree(context.repoRoot, { label: "adversarial-review" }); + try { + const mcpConfigFile = createReviewMcpConfig(worktree.path); + try { + result = await runClaudeAdversarialReview( + worktree.path, + prompt, + schema, + { + model: request.model, + effort: request.effort, + onProgress: request.onProgress, + onSpawn: request.onSpawn, + permissionMode: "dontAsk", + settingsFile: sandboxSettingsFile, + mcpConfigFile, + strictMcpConfig: true, + } + ); + } finally { + cleanupReviewMcpConfig(mcpConfigFile); } - ); + } finally { + worktree.cleanup(); + } } finally { cleanupSandboxSettings(sandboxSettingsFile); } @@ -1834,11 +1867,20 @@ async function main() { case "cancel": await handleCancel(argv); break; + case "mcp-git": + await handleMcpGit(argv); + break; default: throw new Error(`Unknown subcommand: ${subcommand}`); } } +async function handleMcpGit(_argv) { + const { runMcpGitServer } = await import("./lib/mcp-git.mjs"); + const exitCode = await runMcpGitServer(); + process.exit(exitCode ?? 0); +} + main().catch((error) => { const message = error instanceof Error ? error.message : String(error); process.stderr.write(`${message}\n`); diff --git a/scripts/lib/claude-cli.mjs b/scripts/lib/claude-cli.mjs index 770079a..34f5713 100644 --- a/scripts/lib/claude-cli.mjs +++ b/scripts/lib/claude-cli.mjs @@ -12,6 +12,7 @@ import { randomBytes } from "node:crypto"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; +import { fileURLToPath } from "node:url"; import { normalizePathSlashes, resolvePluginRuntimeRoot } from "./codex-paths.mjs"; import { getProcessIdentity, validateProcessIdentity } from "./process.mjs"; @@ -434,6 +435,41 @@ export const SANDBOX_READ_ONLY_TOOLS = [ "Agent(explore,plan)", ]; +/** + * MCP server name used for the bundled read-only git MCP server. Exposes tools as + * `mcp____` (see scripts/lib/mcp-git.mjs for the catalog). + */ +export const REVIEW_MCP_SERVER_NAME = "gitReview"; + +export const REVIEW_MCP_TOOL_NAMES = [ + "diff", + "log", + "show", + "blame", + "status", + "grep", + "ls_files", +]; + +export const REVIEW_MCP_ALLOWED_TOOLS = REVIEW_MCP_TOOL_NAMES.map( + (name) => `mcp__${REVIEW_MCP_SERVER_NAME}__${name}` +); + +/** + * Tools exposed to review/adversarial-review runs. Bash is intentionally absent — + * the Claude CLI does not strictly enforce `Bash(:*)` sub-patterns, so any + * Bash entry would open the full Bash surface. Git operations are surfaced through + * the bundled read-only git MCP server instead (`REVIEW_MCP_ALLOWED_TOOLS`). + */ +export const SANDBOX_REVIEW_TOOLS = [ + "Read", + "Glob", + "Grep", + "WebSearch", + "WebFetch", + ...REVIEW_MCP_ALLOWED_TOOLS, +]; + // --------------------------------------------------------------------------- // Sandbox Settings — OS-level isolation via Claude Code's sandbox feature. // Written to a temp file and passed via --settings. @@ -442,7 +478,10 @@ export const SANDBOX_READ_ONLY_TOOLS = [ /** * Sandbox presets matching Codex sandbox modes. * - * read-only: no writes at all, no network from Bash. + * read-only: no file writes outside the OS temp dir. Network is allowed so + * that `WebFetch`, `WebSearch`, and the Claude CLI's API path keep + * working; the review allowlist excludes Bash entirely, so there + * is no shell surface to exfiltrate or mutate state through. * workspace-write: Bash can write to cwd + OS temp dir only, no network from Bash. * All tools allowed (no allowedTools restriction). */ @@ -450,13 +489,12 @@ export const SANDBOX_SETTINGS = { "read-only": { sandbox: { enabled: true, - autoAllowBashIfSandboxed: true, + // No Bash in the review allowlist, but keep this flag conservative so that + // any sandbox-aware tool still has to opt in explicitly. + autoAllowBashIfSandboxed: false, filesystem: { allowWrite: [SANDBOX_TEMP_DIR], }, - network: { - allowedDomains: [], - }, }, }, "workspace-write": { @@ -500,6 +538,65 @@ export function cleanupSandboxSettings(filePath) { } } +// --------------------------------------------------------------------------- +// Review MCP config +// --------------------------------------------------------------------------- + +/** + * Resolve the absolute path of the bundled claude-companion.mjs script so the + * `mcp-git` subcommand can be invoked from any cwd. Uses `fileURLToPath` so the + * resolution works on Windows where `new URL(...).pathname` is not a usable + * filesystem path. + */ +function resolveCompanionScriptPath() { + return path.resolve( + path.dirname(fileURLToPath(import.meta.url)), + "..", + "claude-companion.mjs" + ); +} + +/** + * Write an `--mcp-config` JSON file that registers the bundled read-only git + * MCP server. The server's CC_GIT_ROOT env var is set to `gitRoot` so that its + * tool handlers operate strictly inside the review worktree. + */ +export function createReviewMcpConfig(gitRoot) { + if (!gitRoot || typeof gitRoot !== "string") { + throw new Error("createReviewMcpConfig: gitRoot is required"); + } + const companionScript = resolveCompanionScriptPath(); + const config = { + mcpServers: { + [REVIEW_MCP_SERVER_NAME]: { + command: process.execPath, + args: [companionScript, "mcp-git"], + env: { + CC_GIT_ROOT: gitRoot, + }, + }, + }, + }; + + const dir = path.join(resolvePluginRuntimeRoot(), "mcp"); + fs.mkdirSync(dir, { recursive: true, mode: 0o700 }); + const tmpFile = path.join( + dir, + `cc-mcp-${process.pid}-${Date.now().toString(36)}-${randomBytes(6).toString("hex")}.json` + ); + fs.writeFileSync(tmpFile, JSON.stringify(config), { + encoding: "utf8", + mode: 0o600, + }); + return tmpFile; +} + +export function cleanupReviewMcpConfig(filePath) { + if (filePath) { + try { fs.unlinkSync(filePath); } catch {} + } +} + // --------------------------------------------------------------------------- // Model & Effort Mapping // --------------------------------------------------------------------------- @@ -619,6 +716,12 @@ export function buildArgs(prompt, options = {}) { if (options.settingsFile) { args.push("--settings", options.settingsFile); } + if (options.mcpConfigFile) { + args.push("--mcp-config", options.mcpConfigFile); + } + if (options.strictMcpConfig) { + args.push("--strict-mcp-config"); + } args.push("--", prompt); return args; @@ -722,12 +825,17 @@ export async function runClaudeTurn(cwd, prompt, options = {}) { /** * Execute a review (non-streaming, no session persistence). + * + * The default allowlist is `SANDBOX_REVIEW_TOOLS` (Read/Glob/Grep/Web + the git + * MCP tool surface). Callers that want to run with an alternative allowlist — + * e.g., legacy `SANDBOX_READ_ONLY_TOOLS` for back-compat — can override via + * `options.allowedTools`. Bash is intentionally excluded by default. */ export async function runClaudeReview(cwd, prompt, options = {}) { // Use streaming mode (same as runClaudeTurn) for progress reporting const result = await runClaudeTurn(cwd, prompt, { noSessionPersistence: true, - allowedTools: SANDBOX_READ_ONLY_TOOLS, + allowedTools: SANDBOX_REVIEW_TOOLS, ...options, }); diff --git a/scripts/lib/mcp-git.mjs b/scripts/lib/mcp-git.mjs new file mode 100644 index 0000000..bf7b98c --- /dev/null +++ b/scripts/lib/mcp-git.mjs @@ -0,0 +1,515 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +/** + * Read-only git MCP server over stdio. + * + * Exposes a small surface of git read-only operations as structured MCP tools so + * that review and adversarial-review runs can inspect commits, diffs, and history + * without exposing the Bash tool. Tool calls land here, argv is constructed inside + * the server (no shell), and the working directory is locked to CC_GIT_ROOT. + */ + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +export const MCP_PROTOCOL_VERSION = "2024-11-05"; +export const MCP_SERVER_NAME = "cc-plugin-codex-git"; +export const MCP_SERVER_VERSION = "1.0.0"; +export const MAX_OUTPUT_BYTES = 64 * 1024 * 1024; +export const MAX_STDIN_BUFFER_BYTES = 1 * 1024 * 1024; + +// Refs may contain letters, digits, and the small set of characters git uses for +// range/relative syntax. We deliberately reject shell metacharacters and spaces +// even though spawnSync does not invoke a shell — keeping the surface narrow makes +// the invariant easy to audit. +const REF_PATTERN = /^[A-Za-z0-9._/\-~^@]{1,200}(?:\.\.\.?[A-Za-z0-9._/\-~^@]{1,200})?$/; + +// Pathspecs ride directly to git after the `--` separator so they cannot become +// flags, but we still gate them: no shell metacharacters, no NUL, no leading `-`, +// no parent traversal, and (when absolute) the resolved location must stay inside +// CC_GIT_ROOT. +const PATHSPEC_FORBIDDEN = /[\0\n\r;&|`$<>*?(){}\[\]"'\\]/; + +const LIMIT_DEFAULTS = { + log: 20, + blame: 0, + grep: 200, +}; + +function isPlainObject(value) { + return value !== null && typeof value === "object" && !Array.isArray(value); +} + +function ensureValidRef(ref, label = "ref") { + if (typeof ref !== "string" || !ref) { + throw new Error(`${label} must be a non-empty string`); + } + if (!REF_PATTERN.test(ref)) { + throw new Error(`${label} contains characters that are not allowed`); + } + return ref; +} + +function ensureValidPath(spec, root) { + if (typeof spec !== "string" || !spec) { + throw new Error("path must be a non-empty string"); + } + if (PATHSPEC_FORBIDDEN.test(spec)) { + throw new Error(`path contains forbidden characters: ${spec}`); + } + if (spec.startsWith("-")) { + throw new Error(`path may not start with '-' (interpreted as flag): ${spec}`); + } + const resolved = path.resolve(root, spec); + const rootResolved = path.resolve(root); + const rel = path.relative(rootResolved, resolved); + if (rel === "" || rel === ".") { + return spec; + } + if (rel.startsWith("..") || path.isAbsolute(rel)) { + throw new Error(`path escapes the git root: ${spec}`); + } + return spec; +} + +function ensureValidPathList(paths, root) { + if (paths == null) return []; + if (!Array.isArray(paths)) { + throw new Error("paths must be an array of strings"); + } + if (paths.length > 64) { + throw new Error("too many paths (max 64)"); + } + return paths.map((p) => ensureValidPath(p, root)); +} + +function ensureBoundedInt(value, { min, max, label }) { + if (value == null) return null; + const num = typeof value === "number" ? value : Number.parseInt(String(value), 10); + if (!Number.isFinite(num) || !Number.isInteger(num)) { + throw new Error(`${label} must be an integer`); + } + if (num < min || num > max) { + throw new Error(`${label} must be between ${min} and ${max}`); + } + return num; +} + +function ensureValidBlameRange(range) { + if (range == null) return null; + if (typeof range !== "string") { + throw new Error("range must be a string like '12,40'"); + } + if (!/^\d{1,7}(?:,\d{1,7})?$/.test(range)) { + throw new Error("range must look like '12' or '12,40'"); + } + return range; +} + +function runGit(root, args) { + const result = spawnSync("git", args, { + cwd: root, + encoding: "utf8", + maxBuffer: MAX_OUTPUT_BYTES, + env: { + ...process.env, + // Disable git config that could side-effect (no signing, no pagers, no + // hooks running through git commands we wrap). Use POSIX `:` and `cat` + // builtins that work the same on Linux, macOS, and git-for-Windows. + GIT_PAGER: "cat", + GIT_EDITOR: ":", + GIT_TERMINAL_PROMPT: "0", + }, + timeout: 30_000, + }); + return { + stdout: result.stdout ?? "", + stderr: result.stderr ?? "", + exitCode: result.status, + signal: result.signal ?? null, + error: result.error?.message ?? null, + }; +} + +// --------------------------------------------------------------------------- +// Tool registry +// --------------------------------------------------------------------------- + +export const TOOL_DEFINITIONS = [ + { + name: "diff", + description: + "Show a git diff inside the review worktree. Optional `refs` (e.g., 'HEAD~3..HEAD' or 'origin/main...HEAD') and optional `paths`. Use `stat: true` for a stat-only summary.", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + refs: { type: "string", description: "Git ref or ref range." }, + paths: { + type: "array", + items: { type: "string" }, + description: "Pathspecs relative to the review worktree.", + }, + stat: { type: "boolean", description: "Return --stat summary instead of full diff." }, + head: { + type: "integer", + minimum: 1, + maximum: 5000, + description: "Truncate output to the first N lines.", + }, + }, + }, + }, + { + name: "log", + description: + "List recent commits. Optional `refs`, `paths`, `limit` (default 20), `format` ('oneline' | 'short' | 'full').", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + refs: { type: "string" }, + paths: { type: "array", items: { type: "string" } }, + limit: { type: "integer", minimum: 1, maximum: 500 }, + format: { type: "string", enum: ["oneline", "short", "medium", "full"] }, + }, + }, + }, + { + name: "show", + description: "Show a single commit (`ref` required). Optional `paths` to restrict to specific files. Optional `stat: true` for stat-only.", + inputSchema: { + type: "object", + additionalProperties: false, + required: ["ref"], + properties: { + ref: { type: "string" }, + paths: { type: "array", items: { type: "string" } }, + stat: { type: "boolean" }, + }, + }, + }, + { + name: "blame", + description: "Show line-by-line authorship of a file. `path` required. Optional `range` ('12,40').", + inputSchema: { + type: "object", + additionalProperties: false, + required: ["path"], + properties: { + path: { type: "string" }, + range: { type: "string", description: "Line range like '12,40' or '12'." }, + }, + }, + }, + { + name: "status", + description: "Show working-tree status. Use `porcelain: true` for machine-readable output.", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + porcelain: { type: "boolean" }, + }, + }, + }, + { + name: "grep", + description: "Search the tracked tree with `git grep`. `pattern` required. Optional `paths`, `limit` (default 200 lines).", + inputSchema: { + type: "object", + additionalProperties: false, + required: ["pattern"], + properties: { + pattern: { type: "string" }, + paths: { type: "array", items: { type: "string" } }, + limit: { type: "integer", minimum: 1, maximum: 5000 }, + ignoreCase: { type: "boolean" }, + word: { type: "boolean", description: "Match whole words only." }, + }, + }, + }, + { + name: "ls_files", + description: "List tracked files. Optional `paths` to filter.", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + paths: { type: "array", items: { type: "string" } }, + }, + }, + }, +]; + +// --------------------------------------------------------------------------- +// Tool handlers +// --------------------------------------------------------------------------- + +function truncateOutput(text, maxLines) { + if (!maxLines || maxLines <= 0) return text; + const lines = text.split("\n"); + if (lines.length <= maxLines) return text; + return [ + ...lines.slice(0, maxLines), + `... (truncated; ${lines.length - maxLines} more line(s))`, + ].join("\n"); +} + +function formatResult(result, options = {}) { + const { truncateLines, label } = options; + let text = result.stdout || ""; + if (truncateLines && text) { + text = truncateOutput(text, truncateLines); + } + if (!text && result.stderr) { + text = `[git ${label ?? "command"} stderr]\n${result.stderr}`; + } + if (result.error) { + text = `${text}\n[error: ${result.error}]`.trim(); + } + const isError = + Boolean(result.error) || + (result.exitCode != null && result.exitCode !== 0 && !result.stdout); + return { text, isError }; +} + +const HANDLERS = { + diff(args, root) { + const cmd = ["diff", "--no-color"]; + if (args?.stat === true) cmd.push("--stat"); + if (args?.refs != null) cmd.push(ensureValidRef(args.refs, "refs")); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + const result = runGit(root, cmd); + const headLimit = ensureBoundedInt(args?.head, { min: 1, max: 5000, label: "head" }); + return formatResult(result, { truncateLines: headLimit, label: "diff" }); + }, + + log(args, root) { + const cmd = ["log", "--no-color"]; + const limit = ensureBoundedInt(args?.limit, { min: 1, max: 500, label: "limit" }) ?? LIMIT_DEFAULTS.log; + cmd.push(`-n`, String(limit)); + const format = args?.format; + if (format === "oneline" || format == null) { + cmd.push("--oneline"); + } else if (format === "short" || format === "medium" || format === "full") { + cmd.push(`--pretty=${format}`); + } else { + throw new Error(`format must be one of: oneline, short, medium, full`); + } + if (args?.refs != null) cmd.push(ensureValidRef(args.refs, "refs")); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + return formatResult(runGit(root, cmd), { label: "log" }); + }, + + show(args, root) { + const cmd = ["show", "--no-color"]; + if (args?.stat === true) cmd.push("--stat"); + cmd.push(ensureValidRef(args?.ref, "ref")); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + return formatResult(runGit(root, cmd), { label: "show" }); + }, + + blame(args, root) { + // `git blame` does not accept `--no-color`; pass color config via -c instead. + const cmd = ["-c", "color.ui=false", "blame"]; + const range = ensureValidBlameRange(args?.range); + if (range) cmd.push("-L", range); + cmd.push(ensureValidPath(args?.path, root)); + return formatResult(runGit(root, cmd), { label: "blame" }); + }, + + status(args, root) { + const cmd = ["status"]; + if (args?.porcelain === true) cmd.push("--porcelain"); + return formatResult(runGit(root, cmd), { label: "status" }); + }, + + grep(args, root) { + const cmd = ["grep", "-n", "--no-color"]; + if (args?.ignoreCase === true) cmd.push("-i"); + if (args?.word === true) cmd.push("-w"); + const pattern = args?.pattern; + if (typeof pattern !== "string" || !pattern) { + throw new Error("pattern must be a non-empty string"); + } + if (pattern.length > 1024) { + throw new Error("pattern too long (max 1024 chars)"); + } + if (/[\0\n\r]/.test(pattern)) { + throw new Error("pattern contains NUL/newline characters"); + } + cmd.push("-e", pattern); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + const limit = ensureBoundedInt(args?.limit, { min: 1, max: 5000, label: "limit" }) ?? LIMIT_DEFAULTS.grep; + return formatResult(runGit(root, cmd), { truncateLines: limit, label: "grep" }); + }, + + ls_files(args, root) { + const cmd = ["ls-files"]; + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + return formatResult(runGit(root, cmd), { label: "ls-files" }); + }, +}; + +// --------------------------------------------------------------------------- +// JSON-RPC dispatch +// --------------------------------------------------------------------------- + +export function handleMcpRequest(request, root) { + if (!isPlainObject(request)) { + return { + jsonrpc: "2.0", + id: null, + error: { code: -32600, message: "Invalid Request" }, + }; + } + const { method, id, params } = request; + const respondError = (code, message) => ({ + jsonrpc: "2.0", + id: id ?? null, + error: { code, message }, + }); + + if (method === "initialize") { + return { + jsonrpc: "2.0", + id, + result: { + protocolVersion: MCP_PROTOCOL_VERSION, + capabilities: { tools: {} }, + serverInfo: { + name: MCP_SERVER_NAME, + version: MCP_SERVER_VERSION, + }, + }, + }; + } + + if (method === "initialized" || method === "notifications/initialized") { + return null; + } + + if (method === "ping") { + return { jsonrpc: "2.0", id, result: {} }; + } + + if (method === "tools/list") { + return { + jsonrpc: "2.0", + id, + result: { tools: TOOL_DEFINITIONS }, + }; + } + + if (method === "tools/call") { + const name = params?.name; + const args = params?.arguments ?? {}; + const handler = HANDLERS[name]; + if (!handler) { + return respondError(-32601, `Unknown tool: ${name}`); + } + try { + const { text, isError } = handler(args, root); + return { + jsonrpc: "2.0", + id, + result: { + content: [{ type: "text", text: text || "(empty)" }], + isError: Boolean(isError), + }, + }; + } catch (err) { + return { + jsonrpc: "2.0", + id, + result: { + content: [{ type: "text", text: `Error: ${err.message}` }], + isError: true, + }, + }; + } + } + + return respondError(-32601, `Method not found: ${method}`); +} + +// --------------------------------------------------------------------------- +// Server entrypoint +// --------------------------------------------------------------------------- + +export async function runMcpGitServer({ + env = process.env, + stdin = process.stdin, + stdout = process.stdout, + stderr = process.stderr, +} = {}) { + const rawRoot = env.CC_GIT_ROOT; + if (!rawRoot) { + stderr.write("[mcp-git] CC_GIT_ROOT env var is required\n"); + return 2; + } + const root = path.resolve(rawRoot); + if (!fs.existsSync(root) || !fs.statSync(root).isDirectory()) { + stderr.write(`[mcp-git] CC_GIT_ROOT not found or not a directory: ${root}\n`); + return 2; + } + + let buffer = ""; + stdin.setEncoding("utf8"); + + const writeResponse = (response) => { + if (response == null) return; + stdout.write(`${JSON.stringify(response)}\n`); + }; + + return await new Promise((resolve) => { + stdin.on("data", (chunk) => { + buffer += chunk; + if (buffer.length > MAX_STDIN_BUFFER_BYTES) { + // A well-behaved MCP client sends newline-delimited JSON. If we have grown + // past the cap without a newline, drop the oversized prefix so memory is + // bounded — subsequent data may still contain valid frames. + const lastNewline = buffer.lastIndexOf("\n"); + if (lastNewline === -1) { + buffer = buffer.slice(-MAX_STDIN_BUFFER_BYTES); + } else { + buffer = buffer.slice(lastNewline + 1); + } + } + let newlineIndex; + while ((newlineIndex = buffer.indexOf("\n")) !== -1) { + const line = buffer.slice(0, newlineIndex).trim(); + buffer = buffer.slice(newlineIndex + 1); + if (!line) continue; + let request; + try { + request = JSON.parse(line); + } catch { + writeResponse({ + jsonrpc: "2.0", + id: null, + error: { code: -32700, message: "Parse error" }, + }); + continue; + } + const response = handleMcpRequest(request, root); + if (response && request.id !== undefined && request.id !== null) { + writeResponse(response); + } + } + }); + + stdin.on("end", () => { + resolve(0); + }); + }); +} diff --git a/scripts/lib/review-worktree.mjs b/scripts/lib/review-worktree.mjs new file mode 100644 index 0000000..e3e710d --- /dev/null +++ b/scripts/lib/review-worktree.mjs @@ -0,0 +1,147 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +/** + * Ephemeral git worktree lifecycle for review/adversarial-review runs. + * + * A review is run inside a throwaway `git worktree` so that any side effect that + * slips past the allowlist (a stray `Bash` injection, a tool that writes outside + * `/tmp`, etc.) is contained and disappears on cleanup. The worktree shares the + * primary repo's object database, so creating one is cheap. + */ + +import { spawnSync } from "node:child_process"; +import { randomBytes } from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; +import { resolvePluginRuntimeRoot } from "./codex-paths.mjs"; + +const WORKTREE_DIR_NAME = "review-worktrees"; + +function runGit(cwd, args, options = {}) { + return spawnSync("git", args, { + cwd, + encoding: "utf8", + timeout: 30_000, + env: { + ...process.env, + GIT_TERMINAL_PROMPT: "0", + // Use git's own no-op editor so review never spawns a real editor. + // `:` is a builtin in both POSIX shells and git's exec resolution. + GIT_EDITOR: ":", + }, + ...options, + }); +} + +function resolveWorktreesRoot() { + const root = path.join(resolvePluginRuntimeRoot(), WORKTREE_DIR_NAME); + fs.mkdirSync(root, { recursive: true, mode: 0o700 }); + return root; +} + +function makeWorktreePath(label) { + const slug = label && /^[A-Za-z0-9._-]+$/.test(label) ? label : "review"; + return path.join( + resolveWorktreesRoot(), + `${slug}-${process.pid}-${Date.now().toString(36)}-${randomBytes(4).toString("hex")}` + ); +} + +/** + * Resolve the canonical git ref to materialize for a review. We default to HEAD + * (the branch tip); callers may pass a specific ref (branch review) or "HEAD" to + * be explicit. + */ +export function resolveBaseRef(repoRoot, requestedRef = "HEAD") { + const ref = requestedRef && /^[A-Za-z0-9._/\-~^@]+$/.test(requestedRef) + ? requestedRef + : "HEAD"; + const result = runGit(repoRoot, ["rev-parse", "--verify", `${ref}^{commit}`]); + if (result.status !== 0) { + throw new Error( + `git rev-parse ${ref} failed: ${(result.stderr ?? "").trim() || "unknown error"}` + ); + } + return result.stdout.trim(); +} + +/** + * Create an ephemeral worktree at the resolved ref. Returns `{ path, cleanup }`. + * The caller MUST invoke `cleanup()` (typically in a `finally` block) so that + * the worktree is removed even if the review fails. + */ +export function createReviewWorktree(repoRoot, { label = "review", ref = "HEAD" } = {}) { + const commit = resolveBaseRef(repoRoot, ref); + const worktreePath = makeWorktreePath(label); + + // `git worktree add --detach ` performs the checkout in one step; + // splitting it into add+checkout is unnecessary and slower. + const created = runGit(repoRoot, [ + "worktree", + "add", + "--detach", + worktreePath, + commit, + ]); + if (created.status !== 0) { + cleanupWorktreeDir(worktreePath); + throw new Error( + `git worktree add failed: ${(created.stderr ?? "").trim() || "unknown error"}` + ); + } + + let removed = false; + const cleanup = () => { + if (removed) return; + removed = true; + safeRemoveWorktree(repoRoot, worktreePath); + }; + + return { path: worktreePath, commit, cleanup }; +} + +function safeRemoveWorktree(repoRoot, worktreePath) { + // Best-effort: ask git to remove, then ensure the directory is gone. + runGit(repoRoot, ["worktree", "remove", "--force", worktreePath]); + cleanupWorktreeDir(worktreePath); + // `git worktree prune` cleans up the bookkeeping under .git/worktrees so that + // a future cleanup pass does not see a dangling entry pointing at a deleted dir. + runGit(repoRoot, ["worktree", "prune"]); +} + +function cleanupWorktreeDir(worktreePath) { + try { + fs.rmSync(worktreePath, { recursive: true, force: true, maxRetries: 3 }); + } catch { + // Last resort — leave on disk rather than crash the review pipeline. + } +} + +/** + * Sweep stale worktrees left behind by crashes. Called on every review start + * so that long-running plugin installs do not accumulate dead worktrees. + */ +export function pruneStaleReviewWorktrees(repoRoot, { maxAgeMs = 6 * 60 * 60 * 1000 } = {}) { + const root = resolveWorktreesRoot(); + let entries; + try { + entries = fs.readdirSync(root, { withFileTypes: true }); + } catch { + return; + } + const now = Date.now(); + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const full = path.join(root, entry.name); + let stat; + try { + stat = fs.statSync(full); + } catch { + continue; + } + if (now - stat.mtimeMs < maxAgeMs) continue; + safeRemoveWorktree(repoRoot, full); + } +} diff --git a/tests/claude-cli.test.mjs b/tests/claude-cli.test.mjs index 7ef679a..3cbc5b1 100644 --- a/tests/claude-cli.test.mjs +++ b/tests/claude-cli.test.mjs @@ -832,11 +832,14 @@ describe("SANDBOX_SETTINGS", () => { assert.ok("workspace-write" in SANDBOX_SETTINGS); }); - it("read-only enables sandbox with allowWrite [SANDBOX_TEMP_DIR]", () => { + it("read-only enables sandbox with allowWrite [SANDBOX_TEMP_DIR] and unrestricted network", () => { const s = SANDBOX_SETTINGS["read-only"].sandbox; assert.equal(s.enabled, true); assert.deepEqual(s.filesystem.allowWrite, [SANDBOX_TEMP_DIR]); - assert.deepEqual(s.network.allowedDomains, []); + // network is intentionally omitted so that WebFetch/WebSearch and Claude's + // own API path remain reachable; review safety comes from removing Bash + // from the allowlist instead. + assert.equal(s.network, undefined); }); it("workspace-write enables sandbox with allowWrite ['.', SANDBOX_TEMP_DIR]", () => { diff --git a/tests/mcp-git.test.mjs b/tests/mcp-git.test.mjs new file mode 100644 index 0000000..b7a3040 --- /dev/null +++ b/tests/mcp-git.test.mjs @@ -0,0 +1,294 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { + TOOL_DEFINITIONS, + MCP_PROTOCOL_VERSION, + MCP_SERVER_NAME, + handleMcpRequest, +} from "../scripts/lib/mcp-git.mjs"; + +// --------------------------------------------------------------------------- +// Fixture: a tiny git repo with two commits +// --------------------------------------------------------------------------- + +let repoRoot; + +function git(cwd, args) { + const result = spawnSync("git", args, { + cwd, + encoding: "utf8", + env: { + ...process.env, + GIT_AUTHOR_NAME: "Test", + GIT_AUTHOR_EMAIL: "test@example.com", + GIT_COMMITTER_NAME: "Test", + GIT_COMMITTER_EMAIL: "test@example.com", + GIT_TERMINAL_PROMPT: "0", + }, + }); + if (result.status !== 0) { + throw new Error( + `git ${args.join(" ")} failed: ${result.stderr ?? result.stdout ?? "unknown"}` + ); + } + return result; +} + +before(() => { + repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), "cc-mcp-git-test-")); + git(repoRoot, ["init", "-q", "-b", "main"]); + fs.writeFileSync(path.join(repoRoot, "hello.txt"), "hello\nworld\n"); + fs.writeFileSync(path.join(repoRoot, "guide.md"), "# Guide\nfirst line.\n"); + git(repoRoot, ["add", "."]); + git(repoRoot, ["commit", "-q", "-m", "initial commit"]); + fs.writeFileSync(path.join(repoRoot, "hello.txt"), "hello\nworld\nadded line\n"); + git(repoRoot, ["add", "."]); + git(repoRoot, ["commit", "-q", "-m", "tweak hello"]); +}); + +after(() => { + if (repoRoot) { + fs.rmSync(repoRoot, { recursive: true, force: true }); + } +}); + +// --------------------------------------------------------------------------- +// JSON-RPC dispatch +// --------------------------------------------------------------------------- + +describe("handleMcpRequest — initialize / tools/list", () => { + it("responds to initialize with server info + capabilities", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", id: 1, method: "initialize", params: {} }, + "/tmp" + ); + assert.equal(res.id, 1); + assert.equal(res.result.protocolVersion, MCP_PROTOCOL_VERSION); + assert.equal(res.result.serverInfo.name, MCP_SERVER_NAME); + assert.ok(res.result.capabilities.tools !== undefined); + }); + + it("lists all expected tools", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", id: 2, method: "tools/list" }, + "/tmp" + ); + const names = res.result.tools.map((t) => t.name).sort(); + assert.deepEqual(names, [ + "blame", + "diff", + "grep", + "log", + "ls_files", + "show", + "status", + ]); + for (const tool of res.result.tools) { + assert.equal(typeof tool.description, "string"); + assert.equal(tool.inputSchema.type, "object"); + } + }); + + it("returns method-not-found for unknown methods", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", id: 3, method: "wat/bogus" }, + "/tmp" + ); + assert.equal(res.error.code, -32601); + }); + + it("returns null (no response) for the initialized notification", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", method: "notifications/initialized" }, + "/tmp" + ); + assert.equal(res, null); + }); + + it("returns Invalid Request (-32600) when payload is not a JSON object", () => { + for (const bogus of [null, [], "string", 42, true]) { + const res = handleMcpRequest(bogus, "/tmp"); + assert.equal(res.error.code, -32600); + assert.equal(res.id, null); + } + }); +}); + +// --------------------------------------------------------------------------- +// Tool handlers (round-trip via handleMcpRequest) +// --------------------------------------------------------------------------- + +function callTool(name, args = {}) { + return handleMcpRequest( + { + jsonrpc: "2.0", + id: 99, + method: "tools/call", + params: { name, arguments: args }, + }, + repoRoot + ); +} + +describe("tool: status", () => { + it("returns clean working tree text", () => { + const res = callTool("status", {}); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /clean|nothing to commit/i); + }); + + it("returns porcelain output", () => { + fs.writeFileSync(path.join(repoRoot, "untracked.txt"), "x"); + try { + const res = callTool("status", { porcelain: true }); + assert.match(res.result.content[0].text, /\?\? untracked\.txt/); + } finally { + fs.unlinkSync(path.join(repoRoot, "untracked.txt")); + } + }); +}); + +describe("tool: log", () => { + it("returns oneline log limited by limit", () => { + const res = callTool("log", { limit: 1, format: "oneline" }); + assert.equal(res.result.isError, false); + const lines = res.result.content[0].text.trim().split("\n"); + assert.equal(lines.length, 1); + }); + + it("rejects unknown formats", () => { + const res = callTool("log", { format: "fancy" }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /format must be/); + }); + + it("rejects refs with shell metacharacters", () => { + const res = callTool("log", { refs: "HEAD; rm -rf /" }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /refs/); + }); + + it("accepts dotted ref ranges", () => { + const res = callTool("log", { refs: "HEAD~1..HEAD", limit: 5 }); + assert.equal(res.result.isError, false); + }); +}); + +describe("tool: diff", () => { + it("returns diff between HEAD~1 and HEAD", () => { + const res = callTool("diff", { refs: "HEAD~1..HEAD" }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /\+added line/); + }); + + it("stat: true returns --stat summary", () => { + const res = callTool("diff", { refs: "HEAD~1..HEAD", stat: true }); + assert.match(res.result.content[0].text, /hello\.txt/); + assert.doesNotMatch(res.result.content[0].text, /\+added line/); + }); + + it("rejects paths that escape the git root", () => { + const res = callTool("diff", { paths: ["../escape"] }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /escape|forbidden/i); + }); + + it("rejects paths that look like flags", () => { + const res = callTool("diff", { paths: ["--exec=evil"] }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /flag/i); + }); + + it("rejects paths with shell metacharacters", () => { + const res = callTool("diff", { paths: ["a;rm"] }); + assert.equal(res.result.isError, true); + }); +}); + +describe("tool: show", () => { + it("requires ref", () => { + const res = callTool("show", {}); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /ref/); + }); + + it("shows HEAD content with stat", () => { + const res = callTool("show", { ref: "HEAD", stat: true }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /hello\.txt/); + }); +}); + +describe("tool: grep", () => { + it("finds matches across tracked files", () => { + const res = callTool("grep", { pattern: "world" }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /hello\.txt:\d+:world/); + }); + + it("requires a non-empty pattern", () => { + const res = callTool("grep", { pattern: "" }); + assert.equal(res.result.isError, true); + }); + + it("rejects null bytes in pattern", () => { + const res = callTool("grep", { pattern: `a${String.fromCharCode(0)}b` }); + assert.equal(res.result.isError, true); + }); +}); + +describe("tool: blame", () => { + it("requires path", () => { + const res = callTool("blame", {}); + assert.equal(res.result.isError, true); + }); + + it("returns blame output", () => { + const res = callTool("blame", { path: "hello.txt" }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /hello/); + }); + + it("accepts numeric range", () => { + const res = callTool("blame", { path: "hello.txt", range: "1,2" }); + assert.equal(res.result.isError, false); + }); + + it("rejects non-numeric range", () => { + const res = callTool("blame", { path: "hello.txt", range: "a,b" }); + assert.equal(res.result.isError, true); + }); +}); + +describe("tool: ls_files", () => { + it("lists tracked files in the worktree", () => { + const res = callTool("ls_files", {}); + assert.equal(res.result.isError, false); + const text = res.result.content[0].text; + assert.match(text, /hello\.txt/); + assert.match(text, /guide\.md/); + }); +}); + +// --------------------------------------------------------------------------- +// Tool registry shape +// --------------------------------------------------------------------------- + +describe("TOOL_DEFINITIONS", () => { + it("every tool has a name, description, and inputSchema", () => { + for (const t of TOOL_DEFINITIONS) { + assert.equal(typeof t.name, "string"); + assert.equal(typeof t.description, "string"); + assert.equal(t.inputSchema.type, "object"); + } + }); +}); diff --git a/tests/review-worktree.test.mjs b/tests/review-worktree.test.mjs new file mode 100644 index 0000000..2d0da30 --- /dev/null +++ b/tests/review-worktree.test.mjs @@ -0,0 +1,171 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { + createReviewWorktree, + pruneStaleReviewWorktrees, + resolveBaseRef, +} from "../scripts/lib/review-worktree.mjs"; + +let repoRoot; +let previousCodexHome; +let previousHome; +let previousUserProfile; +let tempHome; + +function git(cwd, args) { + const result = spawnSync("git", args, { + cwd, + encoding: "utf8", + env: { + ...process.env, + GIT_AUTHOR_NAME: "Test", + GIT_AUTHOR_EMAIL: "test@example.com", + GIT_COMMITTER_NAME: "Test", + GIT_COMMITTER_EMAIL: "test@example.com", + GIT_TERMINAL_PROMPT: "0", + }, + }); + if (result.status !== 0) { + throw new Error( + `git ${args.join(" ")} failed: ${result.stderr ?? result.stdout ?? "unknown"}` + ); + } + return result; +} + +before(() => { + // Sandbox the plugin runtime root so we don't touch the real ~/.codex. + tempHome = fs.mkdtempSync(path.join(os.tmpdir(), "cc-worktree-home-")); + previousHome = process.env.HOME; + previousUserProfile = process.env.USERPROFILE; + previousCodexHome = process.env.CODEX_HOME; + process.env.HOME = tempHome; + process.env.USERPROFILE = tempHome; + process.env.CODEX_HOME = path.join(tempHome, ".codex"); + + repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), "cc-worktree-repo-")); + git(repoRoot, ["init", "-q", "-b", "main"]); + fs.writeFileSync(path.join(repoRoot, "file.txt"), "content\n"); + git(repoRoot, ["add", "."]); + git(repoRoot, ["commit", "-q", "-m", "initial"]); +}); + +after(() => { + if (previousHome == null) delete process.env.HOME; + else process.env.HOME = previousHome; + if (previousUserProfile == null) delete process.env.USERPROFILE; + else process.env.USERPROFILE = previousUserProfile; + if (previousCodexHome == null) delete process.env.CODEX_HOME; + else process.env.CODEX_HOME = previousCodexHome; + if (tempHome) fs.rmSync(tempHome, { recursive: true, force: true }); + if (repoRoot) fs.rmSync(repoRoot, { recursive: true, force: true }); +}); + +// --------------------------------------------------------------------------- + +describe("resolveBaseRef", () => { + it("returns a 40-char SHA for HEAD", () => { + const sha = resolveBaseRef(repoRoot, "HEAD"); + assert.match(sha, /^[a-f0-9]{40}$/); + }); + + it("falls back to HEAD when the requested ref contains forbidden chars", () => { + const sha = resolveBaseRef(repoRoot, "HEAD; rm -rf /"); + assert.match(sha, /^[a-f0-9]{40}$/); + }); + + it("throws when the ref does not exist", () => { + assert.throws( + () => resolveBaseRef(repoRoot, "nonexistent-ref-xyz"), + /git rev-parse/ + ); + }); +}); + +describe("createReviewWorktree", () => { + it("creates a worktree at HEAD with checked-out files", () => { + const wt = createReviewWorktree(repoRoot, { label: "unit" }); + try { + assert.ok(fs.existsSync(wt.path)); + assert.ok(fs.existsSync(path.join(wt.path, "file.txt"))); + assert.match(wt.commit, /^[a-f0-9]{40}$/); + } finally { + wt.cleanup(); + } + }); + + it("cleanup removes the worktree directory", () => { + const wt = createReviewWorktree(repoRoot); + assert.ok(fs.existsSync(wt.path)); + wt.cleanup(); + assert.equal(fs.existsSync(wt.path), false); + }); + + it("cleanup is idempotent", () => { + const wt = createReviewWorktree(repoRoot); + wt.cleanup(); + wt.cleanup(); // should not throw + assert.equal(fs.existsSync(wt.path), false); + }); + + it("uses a sanitized slug derived from the label", () => { + const wt = createReviewWorktree(repoRoot, { label: "review" }); + try { + assert.match(path.basename(wt.path), /^review-/); + } finally { + wt.cleanup(); + } + }); + + it("falls back to a safe slug when label contains illegal characters", () => { + const wt = createReviewWorktree(repoRoot, { label: "rev iew/../bad" }); + try { + assert.match(path.basename(wt.path), /^review-/); + } finally { + wt.cleanup(); + } + }); + + it("each invocation creates a unique path", () => { + const a = createReviewWorktree(repoRoot, { label: "a" }); + const b = createReviewWorktree(repoRoot, { label: "a" }); + try { + assert.notEqual(a.path, b.path); + } finally { + a.cleanup(); + b.cleanup(); + } + }); +}); + +describe("pruneStaleReviewWorktrees", () => { + it("removes worktree dirs older than the threshold", () => { + const wt = createReviewWorktree(repoRoot, { label: "stale" }); + // Don't call wt.cleanup() — simulate a crashed run. + // Force the directory's mtime backward by 7 hours. + const oldTime = (Date.now() - 7 * 60 * 60 * 1000) / 1000; + fs.utimesSync(wt.path, oldTime, oldTime); + assert.ok(fs.existsSync(wt.path)); + pruneStaleReviewWorktrees(repoRoot, { maxAgeMs: 6 * 60 * 60 * 1000 }); + assert.equal(fs.existsSync(wt.path), false); + }); + + it("leaves fresh worktree dirs alone", () => { + const wt = createReviewWorktree(repoRoot, { label: "fresh" }); + try { + pruneStaleReviewWorktrees(repoRoot, { maxAgeMs: 6 * 60 * 60 * 1000 }); + assert.ok(fs.existsSync(wt.path)); + } finally { + wt.cleanup(); + } + }); +}); diff --git a/tests/sandbox-modes.test.mjs b/tests/sandbox-modes.test.mjs index 6b44a25..12ddf23 100644 --- a/tests/sandbox-modes.test.mjs +++ b/tests/sandbox-modes.test.mjs @@ -12,10 +12,16 @@ import { buildArgs, SANDBOX_READ_ONLY_BASH_TOOLS, SANDBOX_READ_ONLY_TOOLS, + SANDBOX_REVIEW_TOOLS, SANDBOX_TEMP_DIR, SANDBOX_SETTINGS, + REVIEW_MCP_SERVER_NAME, + REVIEW_MCP_TOOL_NAMES, + REVIEW_MCP_ALLOWED_TOOLS, createSandboxSettings, cleanupSandboxSettings, + createReviewMcpConfig, + cleanupReviewMcpConfig, } from "../scripts/lib/claude-cli.mjs"; import { resolvePluginRuntimeRoot } from "../scripts/lib/codex-paths.mjs"; @@ -202,12 +208,15 @@ describe("sandbox settings lifecycle", () => { // --------------------------------------------------------------------------- describe("sandbox settings content", () => { - it("read-only: sandbox enabled, allowWrite temp dir only, no network", () => { + it("read-only: sandbox enabled, allowWrite temp dir only, network unrestricted", () => { const s = SANDBOX_SETTINGS["read-only"]; assert.equal(s.sandbox.enabled, true); - assert.equal(s.sandbox.autoAllowBashIfSandboxed, true); + assert.equal(s.sandbox.autoAllowBashIfSandboxed, false); assert.deepEqual(s.sandbox.filesystem.allowWrite, [SANDBOX_TEMP_DIR]); - assert.deepEqual(s.sandbox.network.allowedDomains, []); + // network block intentionally omitted: review/adversarial-review need network + // for WebFetch/WebSearch and the Claude CLI's own API access. Mutation + // surfaces are closed off by removing Bash from the allowlist instead. + assert.equal(s.sandbox.network, undefined); }); it("workspace-write: sandbox enabled, allowWrite cwd+temp dir, no network", () => { @@ -223,7 +232,9 @@ describe("sandbox settings content", () => { SANDBOX_SETTINGS["read-only"].sandbox.filesystem, SANDBOX_SETTINGS["workspace-write"].sandbox.filesystem ); - assert.deepEqual( + // read-only deliberately leaves network unset (allowed) while workspace-write + // explicitly closes outbound network from Bash. The two modes diverge here. + assert.notDeepEqual( SANDBOX_SETTINGS["read-only"].sandbox.network, SANDBOX_SETTINGS["workspace-write"].sandbox.network ); @@ -256,3 +267,102 @@ describe("mode consistency", () => { assert.equal(argsAllowedTools(args).length, 0); }); }); + +// --------------------------------------------------------------------------- +// 6. Review tool surface — MCP-only git access, no Bash +// --------------------------------------------------------------------------- + +describe("SANDBOX_REVIEW_TOOLS", () => { + it("includes Read, Glob, Grep, WebSearch, WebFetch", () => { + for (const t of ["Read", "Glob", "Grep", "WebSearch", "WebFetch"]) { + assert.ok(SANDBOX_REVIEW_TOOLS.includes(t), `missing ${t}`); + } + }); + + it("does NOT include any Bash entry (Bash patterns are not strictly enforced)", () => { + for (const t of SANDBOX_REVIEW_TOOLS) { + assert.ok( + !/^Bash(\(|$)/.test(t), + `review allowlist must not contain Bash: ${t}` + ); + } + }); + + it("does NOT include Write/Edit/MultiEdit/NotebookEdit/Task", () => { + for (const t of ["Write", "Edit", "MultiEdit", "NotebookEdit", "Task"]) { + assert.ok(!SANDBOX_REVIEW_TOOLS.includes(t), `${t} must not be allowed`); + } + }); + + it("exposes the bundled git MCP tools as mcp____ entries", () => { + for (const name of REVIEW_MCP_TOOL_NAMES) { + const expected = `mcp__${REVIEW_MCP_SERVER_NAME}__${name}`; + assert.ok( + SANDBOX_REVIEW_TOOLS.includes(expected), + `missing MCP tool entry: ${expected}` + ); + assert.ok(REVIEW_MCP_ALLOWED_TOOLS.includes(expected)); + } + }); + + it("REVIEW_MCP_TOOL_NAMES covers diff, log, show, blame, status, grep, ls_files", () => { + for (const expected of ["diff", "log", "show", "blame", "status", "grep", "ls_files"]) { + assert.ok(REVIEW_MCP_TOOL_NAMES.includes(expected), `missing ${expected}`); + } + }); +}); + +// --------------------------------------------------------------------------- +// 7. createReviewMcpConfig +// --------------------------------------------------------------------------- + +describe("createReviewMcpConfig", () => { + it("writes a JSON file that registers the gitReview MCP server", () => { + withTempCodexHome(() => { + const filePath = createReviewMcpConfig("/tmp/cc-review-fake-root"); + try { + const content = JSON.parse(fs.readFileSync(filePath, "utf8")); + assert.ok(content.mcpServers); + const server = content.mcpServers[REVIEW_MCP_SERVER_NAME]; + assert.ok(server, "gitReview server must be present"); + assert.equal(typeof server.command, "string"); + assert.ok(Array.isArray(server.args)); + assert.ok(server.args.some((a) => a.endsWith("claude-companion.mjs"))); + assert.ok(server.args.includes("mcp-git")); + assert.equal(server.env.CC_GIT_ROOT, "/tmp/cc-review-fake-root"); + } finally { + cleanupReviewMcpConfig(filePath); + } + }); + }); + + it("rejects missing/blank gitRoot", () => { + withTempCodexHome(() => { + assert.throws(() => createReviewMcpConfig(""), /gitRoot is required/); + assert.throws(() => createReviewMcpConfig(null), /gitRoot is required/); + assert.throws(() => createReviewMcpConfig(undefined), /gitRoot is required/); + }); + }); +}); + +// --------------------------------------------------------------------------- +// 8. buildArgs — review mode (mcpConfigFile + strictMcpConfig + new allowlist) +// --------------------------------------------------------------------------- + +describe("buildArgs review mode", () => { + it("emits --mcp-config when mcpConfigFile is provided", () => { + const args = buildArgs("p", { mcpConfigFile: "/tmp/mcp.json" }); + assert.ok(argsHas(args, "--mcp-config", "/tmp/mcp.json")); + }); + + it("emits --strict-mcp-config when strictMcpConfig is set", () => { + const args = buildArgs("p", { strictMcpConfig: true }); + assert.ok(args.includes("--strict-mcp-config")); + }); + + it("can express the review allowlist via --allowedTools per entry", () => { + const args = buildArgs("p", { allowedTools: SANDBOX_REVIEW_TOOLS }); + const allowed = argsAllowedTools(args); + assert.deepEqual([...allowed].sort(), [...SANDBOX_REVIEW_TOOLS].sort()); + }); +}); From 9509c06fb28c0d8273677631b404c43efc98aa47 Mon Sep 17 00:00:00 2001 From: jinku Date: Mon, 11 May 2026 15:31:52 -0700 Subject: [PATCH 3/4] fix(review): address Codex review findings on the worktree+MCP isolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six findings from the Codex review pass on commit 0d9bf09, all addressed: 1. Working-tree reviews were silently incomplete. The previous code always built a worktree at HEAD, so staged/unstaged/untracked changes disappeared from MCP git output — `status` reported clean and `diff` showed nothing. Replace `createReviewWorktree` at the review entry point with `createReviewIsolation`, which keeps the worktree for branch reviews and runs in the original repo for working-tree reviews. Containment for working-tree mode now relies on the Bash-free allowlist + read-only sandbox (the same surface that holds for the worktree case). 2. Reject refs that start with `-` (e.g. `-p`, `--ext-diff`). The previous `REF_PATTERN` allowed leading hyphens, so a "ref" could be passed to git as an option since refs are emitted before the `--` separator. Add an explicit guard + tests covering the dash-prefix class. 3. `runMcpGitServer` was suppressing `-32600 Invalid Request` responses because the response gate keyed off `request.id`. Malformed payloads (arrays, scalars) have no id, so the error never reached the caller. Send error responses regardless of request-id presence; keep the notification rule for valid no-id requests. 4. `git worktree add` failures could leave behind a `.git/worktrees/...` bookkeeping entry. Run `git worktree prune` on the failure path so partial setup does not strand metadata. 5. `kill -9` mid-run could leak `sandbox/cc-sandbox-*.json` and `mcp/cc-mcp-*.json` files forever, with the MCP file containing a stale `CC_GIT_ROOT`. Add `pruneStaleSandboxSettings` and `pruneStaleReviewMcpConfigs` sweepers (same 6h threshold as the worktree sweeper) and call them at review start. 6. `createReviewIsolation` is moved to the same module as the worktree helpers so it is unit-testable. Tests cover both modes plus the new stdio framing behavior (child-process round-trip of `mcp-git` confirming the -32600 error path actually emits a response). The third Codex finding — that `Read`/`Glob`/`Grep` plus an open network leave a broader exfiltration surface than the MCP-only git boundary suggests — is intentional for now (the user explicitly asked for network access). It is documented as a known trade-off rather than fixed in this commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/claude-companion.mjs | 26 +++++++---- scripts/lib/claude-cli.mjs | 50 ++++++++++++++++++++ scripts/lib/mcp-git.mjs | 19 +++++++- scripts/lib/review-worktree.mjs | 40 ++++++++++++++++ tests/mcp-git.test.mjs | 82 +++++++++++++++++++++++++++++++++ tests/review-worktree.test.mjs | 46 ++++++++++++++++++ 6 files changed, 252 insertions(+), 11 deletions(-) diff --git a/scripts/claude-companion.mjs b/scripts/claude-companion.mjs index 825d4f2..414feb5 100644 --- a/scripts/claude-companion.mjs +++ b/scripts/claude-companion.mjs @@ -46,9 +46,11 @@ import { cleanupSandboxSettings, createReviewMcpConfig, cleanupReviewMcpConfig, + pruneStaleSandboxSettings, + pruneStaleReviewMcpConfigs, } from "./lib/claude-cli.mjs"; import { - createReviewWorktree, + createReviewIsolation, pruneStaleReviewWorktrees, } from "./lib/review-worktree.mjs"; import { readStdinIfPiped } from "./lib/fs.mjs"; @@ -472,8 +474,10 @@ async function executeReviewRun(request) { ensureClaudeReady(request.cwd); ensureGitRepository(request.cwd); - // Sweep dead worktrees from previous crashed runs before allocating a new one. + // Sweep dead resources from previous crashed runs before allocating new ones. try { pruneStaleReviewWorktrees(request.cwd); } catch {} + try { pruneStaleSandboxSettings(); } catch {} + try { pruneStaleReviewMcpConfigs(); } catch {} const target = resolveReviewTarget(request.cwd, { base: request.base, @@ -489,11 +493,11 @@ async function executeReviewRun(request) { let result; const sandboxSettingsFile = createSandboxSettings("read-only"); try { - const worktree = createReviewWorktree(request.cwd, { label: "review" }); + const isolation = createReviewIsolation(request.cwd, target, { label: "review" }); try { - const mcpConfigFile = createReviewMcpConfig(worktree.path); + const mcpConfigFile = createReviewMcpConfig(isolation.gitRoot); try { - result = await runClaudeReview(worktree.path, prompt, { + result = await runClaudeReview(isolation.cwd, prompt, { model: request.model, effort: request.effort, onProgress: request.onProgress, @@ -507,7 +511,7 @@ async function executeReviewRun(request) { cleanupReviewMcpConfig(mcpConfigFile); } } finally { - worktree.cleanup(); + isolation.cleanup(); } } finally { cleanupSandboxSettings(sandboxSettingsFile); @@ -556,12 +560,14 @@ async function executeReviewRun(request) { let result; const sandboxSettingsFile = createSandboxSettings("read-only"); try { - const worktree = createReviewWorktree(context.repoRoot, { label: "adversarial-review" }); + const isolation = createReviewIsolation(context.repoRoot, target, { + label: "adversarial-review", + }); try { - const mcpConfigFile = createReviewMcpConfig(worktree.path); + const mcpConfigFile = createReviewMcpConfig(isolation.gitRoot); try { result = await runClaudeAdversarialReview( - worktree.path, + isolation.cwd, prompt, schema, { @@ -579,7 +585,7 @@ async function executeReviewRun(request) { cleanupReviewMcpConfig(mcpConfigFile); } } finally { - worktree.cleanup(); + isolation.cleanup(); } } finally { cleanupSandboxSettings(sandboxSettingsFile); diff --git a/scripts/lib/claude-cli.mjs b/scripts/lib/claude-cli.mjs index 34f5713..131ac3f 100644 --- a/scripts/lib/claude-cli.mjs +++ b/scripts/lib/claude-cli.mjs @@ -597,6 +597,56 @@ export function cleanupReviewMcpConfig(filePath) { } } +// --------------------------------------------------------------------------- +// Stale tmp sweepers — reclaim files left behind by SIGKILL/crashes. +// --------------------------------------------------------------------------- + +function pruneStaleTempFiles(subdir, options = {}) { + const prefix = options.prefix; + const maxAgeMs = options.maxAgeMs ?? 6 * 60 * 60 * 1000; + const dir = path.join(resolvePluginRuntimeRoot(), subdir); + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + const now = Date.now(); + for (const entry of entries) { + if (!entry.isFile()) continue; + if (prefix && !entry.name.startsWith(prefix)) continue; + const full = path.join(dir, entry.name); + let stat; + try { + stat = fs.statSync(full); + } catch { + continue; + } + if (now - stat.mtimeMs < maxAgeMs) continue; + try { + fs.unlinkSync(full); + } catch { + // Best effort: leave on disk rather than crash callers. + } + } +} + +/** + * Sweep sandbox-settings JSON files left behind by crashes. Call this at the + * start of any flow that creates sandbox settings so they do not accumulate. + */ +export function pruneStaleSandboxSettings(options = {}) { + pruneStaleTempFiles("sandbox", { prefix: "cc-sandbox-", ...options }); +} + +/** + * Sweep review MCP config JSON files left behind by crashes. The same SIGKILL + * window that strands a worktree can strand the MCP config; clean both. + */ +export function pruneStaleReviewMcpConfigs(options = {}) { + pruneStaleTempFiles("mcp", { prefix: "cc-mcp-", ...options }); +} + // --------------------------------------------------------------------------- // Model & Effort Mapping // --------------------------------------------------------------------------- diff --git a/scripts/lib/mcp-git.mjs b/scripts/lib/mcp-git.mjs index bf7b98c..294aba5 100644 --- a/scripts/lib/mcp-git.mjs +++ b/scripts/lib/mcp-git.mjs @@ -50,6 +50,12 @@ function ensureValidRef(ref, label = "ref") { if (!REF_PATTERN.test(ref)) { throw new Error(`${label} contains characters that are not allowed`); } + // A leading `-` would be interpreted by git as an option (e.g., `-p`, + // `--ext-diff`) rather than a ref. Reject explicitly even though `--` follows + // ref tokens elsewhere, since refs are emitted *before* the `--` separator. + if (ref.startsWith("-")) { + throw new Error(`${label} may not start with '-' (interpreted as flag): ${ref}`); + } return ref; } @@ -502,7 +508,18 @@ export async function runMcpGitServer({ continue; } const response = handleMcpRequest(request, root); - if (response && request.id !== undefined && request.id !== null) { + if (!response) { + continue; + } + // Error responses (including -32600 for non-object payloads) must always + // be emitted, even when the request has no `id`; otherwise the caller has + // no signal that the framing was wrong. Result responses only flow back + // when the request had an id (i.e., was a request, not a notification). + const hasRequestId = + isPlainObject(request) && + request.id !== undefined && + request.id !== null; + if (response.error || hasRequestId) { writeResponse(response); } } diff --git a/scripts/lib/review-worktree.mjs b/scripts/lib/review-worktree.mjs index e3e710d..b82792e 100644 --- a/scripts/lib/review-worktree.mjs +++ b/scripts/lib/review-worktree.mjs @@ -86,7 +86,11 @@ export function createReviewWorktree(repoRoot, { label = "review", ref = "HEAD" commit, ]); if (created.status !== 0) { + // `git worktree add` may have partially registered a `.git/worktrees/` + // bookkeeping entry before failing on checkout or ENOSPC. Remove the + // filesystem directory and then prune so the partial entry is reclaimed. cleanupWorktreeDir(worktreePath); + runGit(repoRoot, ["worktree", "prune"]); throw new Error( `git worktree add failed: ${(created.stderr ?? "").trim() || "unknown error"}` ); @@ -119,6 +123,42 @@ function cleanupWorktreeDir(worktreePath) { } } +/** + * Choose how to expose the repository to a review run. + * + * For branch reviews we run inside an ephemeral worktree checked out at the + * branch tip: that gives us mutation isolation without losing any of the + * commits Claude needs to inspect. + * + * For working-tree reviews we deliberately *do not* create a worktree. A + * worktree-from-HEAD would hide the very staged/unstaged/untracked changes + * that the reviewer is supposed to inspect — `git status` would report clean, + * `git diff` would show nothing, and the MCP server pointed at the worktree + * would mislead Claude into thinking the repo is unchanged. Instead we run in + * the original repo and rely on the Bash-free allowlist for containment. + * + * Returns `{ cwd, gitRoot, cleanup }`. `gitRoot` is the path the MCP git server + * should be rooted at; `cwd` is what the Claude CLI should treat as the + * working directory. + */ +export function createReviewIsolation(repoRoot, target, { label = "review" } = {}) { + if (target?.mode === "working-tree") { + return { + cwd: repoRoot, + gitRoot: repoRoot, + cleanup: () => {}, + isolated: false, + }; + } + const worktree = createReviewWorktree(repoRoot, { label }); + return { + cwd: worktree.path, + gitRoot: worktree.path, + cleanup: () => worktree.cleanup(), + isolated: true, + }; +} + /** * Sweep stale worktrees left behind by crashes. Called on every review start * so that long-running plugin installs do not accumulate dead worktrees. diff --git a/tests/mcp-git.test.mjs b/tests/mcp-git.test.mjs index b7a3040..ed77f36 100644 --- a/tests/mcp-git.test.mjs +++ b/tests/mcp-git.test.mjs @@ -177,6 +177,14 @@ describe("tool: log", () => { assert.match(res.result.content[0].text, /refs/); }); + it("rejects refs that start with '-' (would be interpreted as a git flag)", () => { + for (const ref of ["-p", "--ext-diff", "--help", "-"]) { + const res = callTool("log", { refs: ref }); + assert.equal(res.result.isError, true, `should reject ${ref}`); + assert.match(res.result.content[0].text, /flag|-/); + } + }); + it("accepts dotted ref ranges", () => { const res = callTool("log", { refs: "HEAD~1..HEAD", limit: 5 }); assert.equal(res.result.isError, false); @@ -292,3 +300,77 @@ describe("TOOL_DEFINITIONS", () => { } }); }); + +// --------------------------------------------------------------------------- +// Stdio framing — runMcpGitServer end-to-end via the companion subcommand +// --------------------------------------------------------------------------- + +describe("runMcpGitServer stdio framing", () => { + const companion = path.join( + path.dirname(new URL(import.meta.url).pathname), + "..", + "scripts", + "claude-companion.mjs" + ); + + function runServer(inputLines, env = {}) { + const child = spawnSync(process.execPath, [companion, "mcp-git"], { + input: inputLines.map((line) => `${line}\n`).join(""), + encoding: "utf8", + env: { ...process.env, CC_GIT_ROOT: repoRoot, ...env }, + timeout: 10_000, + }); + return child; + } + + it("emits a response for a valid initialize request", () => { + const child = runServer([ + JSON.stringify({ jsonrpc: "2.0", id: 1, method: "initialize", params: {} }), + ]); + assert.equal(child.status, 0); + const lines = child.stdout.trim().split("\n").filter(Boolean); + assert.equal(lines.length, 1); + const res = JSON.parse(lines[0]); + assert.equal(res.id, 1); + assert.ok(res.result?.serverInfo); + }); + + it("emits an Invalid Request error even when the payload is not an object", () => { + const child = runServer(["[1,2,3]", "true", "\"hello\"", "42"]); + assert.equal(child.status, 0); + const lines = child.stdout.trim().split("\n").filter(Boolean); + // One -32600 response per malformed payload. + assert.equal(lines.length, 4); + for (const line of lines) { + const res = JSON.parse(line); + assert.equal(res.error.code, -32600); + assert.equal(res.id, null); + } + }); + + it("emits Parse error for non-JSON lines", () => { + const child = runServer(["this is not json"]); + assert.equal(child.status, 0); + const res = JSON.parse(child.stdout.trim()); + assert.equal(res.error.code, -32700); + }); + + it("does not respond to notifications (no id)", () => { + const child = runServer([ + JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized" }), + ]); + assert.equal(child.status, 0); + assert.equal(child.stdout.trim(), ""); + }); + + it("exits non-zero when CC_GIT_ROOT is missing", () => { + const child = spawnSync(process.execPath, [companion, "mcp-git"], { + input: "", + encoding: "utf8", + env: { ...process.env, CC_GIT_ROOT: "" }, + timeout: 5_000, + }); + assert.notEqual(child.status, 0); + assert.match(child.stderr, /CC_GIT_ROOT/); + }); +}); diff --git a/tests/review-worktree.test.mjs b/tests/review-worktree.test.mjs index 2d0da30..19a4fec 100644 --- a/tests/review-worktree.test.mjs +++ b/tests/review-worktree.test.mjs @@ -10,6 +10,7 @@ import os from "node:os"; import path from "node:path"; import { + createReviewIsolation, createReviewWorktree, pruneStaleReviewWorktrees, resolveBaseRef, @@ -147,6 +148,51 @@ describe("createReviewWorktree", () => { }); }); +describe("createReviewIsolation", () => { + it("returns the original repoRoot for working-tree mode (no worktree)", () => { + const iso = createReviewIsolation(repoRoot, { mode: "working-tree", label: "wt" }); + try { + assert.equal(iso.cwd, repoRoot); + assert.equal(iso.gitRoot, repoRoot); + assert.equal(iso.isolated, false); + } finally { + iso.cleanup(); + } + }); + + it("does not create a worktree directory for working-tree mode", () => { + const root = path.join( + process.env.CODEX_HOME, + "plugins", "data", "cc", "runtime", "review-worktrees" + ); + const countEntries = () => { + try { + return fs.readdirSync(root, { withFileTypes: true }).length; + } catch { + return 0; + } + }; + const before = countEntries(); + const iso = createReviewIsolation(repoRoot, { mode: "working-tree" }); + const after = countEntries(); + assert.equal(after, before); + iso.cleanup(); + }); + + it("creates and cleans up an ephemeral worktree for branch mode", () => { + const iso = createReviewIsolation(repoRoot, { mode: "branch" }, { label: "iso-branch" }); + try { + assert.notEqual(iso.cwd, repoRoot); + assert.equal(iso.gitRoot, iso.cwd); + assert.equal(iso.isolated, true); + assert.ok(fs.existsSync(iso.cwd)); + } finally { + iso.cleanup(); + assert.equal(fs.existsSync(iso.cwd), false); + } + }); +}); + describe("pruneStaleReviewWorktrees", () => { it("removes worktree dirs older than the threshold", () => { const wt = createReviewWorktree(repoRoot, { label: "stale" }); From eff2cdcaf38910632d956b661d0c3aaf066b68e0 Mon Sep 17 00:00:00 2001 From: jinku Date: Mon, 11 May 2026 16:49:20 -0700 Subject: [PATCH 4/4] =?UTF-8?q?release:=201.2.0=20=E2=80=94=20drop=20env?= =?UTF-8?q?=20scrubbing,=20finalize=20review=20isolation=20defaults?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the inherited buildClaudeSpawnEnv loopback-proxy scrubbing that was bundled into commit 9327619. The case it protected (a stale ad-hoc shell export of ANTHROPIC_BASE_URL / *_PROXY pointing at a dead loopback URL, while the user is *not* using cco) is uncommon, and the scrubbing's asymmetric behavior — parent Codex fails against the dead proxy, child Claude silently bypasses it — makes diagnosis harder than the symmetric failure mode. cco users already had their entries registered in `~/.claude/settings.json` and were unaffected by the scrubber in either direction. Bump version to 1.2.0 (package.json + .codex-plugin/plugin.json) and add a CHANGELOG entry describing: - the new default Claude model (opus-4.7[1m], xhigh effort) for review, adversarial-review, and rescue/task; with sonnet/haiku per-model effort defaults - the review/adversarial-review isolation rework (ephemeral worktree + bundled read-only git MCP server + Bash-free allowlist) - the read-only sandbox preset leaving network unrestricted - new `--effort` flag exposed on the review skills - stale-resource sweepers for crashed runs Co-Authored-By: Claude Opus 4.7 (1M context) --- .codex-plugin/plugin.json | 2 +- CHANGELOG.md | 8 ++++ package.json | 2 +- scripts/lib/claude-cli.mjs | 71 ----------------------------------- tests/claude-cli.test.mjs | 77 -------------------------------------- 5 files changed, 10 insertions(+), 150 deletions(-) diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index 027335e..f6183cc 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "cc", - "version": "1.1.0", + "version": "1.2.0", "description": "Claude Code Plugin for Codex. Delegate code reviews, investigations, and tracked tasks to Claude Code from inside Codex.", "author": { "name": "Sendbird, Inc.", diff --git a/CHANGELOG.md b/CHANGELOG.md index 6117a70..4bab30d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v1.2.0 + +- Default the Claude model for `review`, `adversarial-review`, and `rescue`/`task` to `opus` (resolved to the 1M-context variant `claude-opus-4-7[1m]`) with `xhigh` effort. The `sonnet` alias resolves to `claude-sonnet-4-6[1m]` and defaults to `high` effort; `haiku` stays on `claude-haiku-4-5` with effort unset. `--model` and `--effort` remain user-overridable; `xhigh` is now a first-class effort level and `max` is reserved for users who explicitly opt in. +- Isolate `review` and `adversarial-review` from the user repo with a three-layer design instead of the previous Bash-pattern allowlist (which the Claude CLI does not strictly enforce — once `Bash` is in the allowlist with any sub-pattern, the entire `Bash` tool opens up). Reviews now run inside an ephemeral `git worktree` checked out at the branch tip (or the original repo for `working-tree` scope, so staged/unstaged/untracked changes remain visible), use a bundled read-only git MCP server (`mcp-git` subcommand) exposing `diff`/`log`/`show`/`blame`/`status`/`grep`/`ls_files` as structured tools with strict ref/path validation, and tighten the allowlist to `Read`, `Glob`, `Grep`, `WebSearch`, `WebFetch`, and `mcp__gitReview__*` only (no `Bash` entry). +- Leave network unrestricted in the `read-only` sandbox preset so `WebFetch`/`WebSearch` and the Claude CLI's own API path keep working; safety comes from removing `Bash` from the allowlist rather than from blocking network. File writes outside the OS temp dir stay blocked. +- Expose `--effort` on `review` and `adversarial-review` and document the new defaults in `SKILL.md`, `README.md`, and the internal `cli-runtime` reference. +- Sweep stranded `review-worktrees/`, `sandbox/`, and `mcp/` runtime files older than six hours at the start of every review to reclaim resources after `kill -9` or crashed runs. + ## v1.1.0 - Restructure the internal Claude runtime and prompt-shaping guidance from pseudo-hidden `SKILL.md` files into plain internal reference documents, while keeping the public `review`, `adversarial-review`, and `rescue` skills self-sufficient on their critical invocation rules. diff --git a/package.json b/package.json index 5e14aef..12adf18 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cc-plugin-codex", - "version": "1.1.0", + "version": "1.2.0", "description": "Claude Code Plugin for Codex by Sendbird", "type": "module", "author": { diff --git a/scripts/lib/claude-cli.mjs b/scripts/lib/claude-cli.mjs index 131ac3f..c06c9ab 100644 --- a/scripts/lib/claude-cli.mjs +++ b/scripts/lib/claude-cli.mjs @@ -17,16 +17,6 @@ import { normalizePathSlashes, resolvePluginRuntimeRoot } from "./codex-paths.mj import { getProcessIdentity, validateProcessIdentity } from "./process.mjs"; const CLAUDE_BIN = "claude"; -const CLAUDE_LOCAL_OVERRIDE_ENV_KEYS = [ - "ANTHROPIC_BASE_URL", - "ANTHROPIC_API_BASE_URL", - "HTTP_PROXY", - "HTTPS_PROXY", - "ALL_PROXY", - "http_proxy", - "https_proxy", - "all_proxy", -]; export const MAX_STREAM_PARSER_UNKNOWN_EVENTS = 50; export const MAX_STREAM_PARSER_PARSE_ERRORS = 50; export const MAX_STREAM_PARSER_TOOL_USES = 256; @@ -82,66 +72,6 @@ function appendTextTail(existing, chunk, maxBytes) { return sliceTextTailByBytes(next, maxBytes); } -function resolveClaudeSettingsPath(settingsPath = null) { - if (settingsPath) { - return settingsPath; - } - const claudeConfigDir = process.env.CLAUDE_CONFIG_DIR || path.join(os.homedir(), ".claude"); - return path.join(claudeConfigDir, "settings.json"); -} - -function loadClaudeConfiguredEnv(settingsPath = null) { - const resolvedPath = resolveClaudeSettingsPath(settingsPath); - try { - if (!fs.existsSync(resolvedPath)) { - return {}; - } - const parsed = JSON.parse(fs.readFileSync(resolvedPath, "utf8")); - if (parsed?.env && typeof parsed.env === "object" && !Array.isArray(parsed.env)) { - return parsed.env; - } - return {}; - } catch { - return null; - } -} - -function isLoopbackUrl(rawValue) { - if (!rawValue || typeof rawValue !== "string") { - return false; - } - try { - const parsed = new URL(rawValue); - return new Set(["127.0.0.1", "localhost", "::1", "[::1]"]).has(parsed.hostname); - } catch { - return false; - } -} - -export function buildClaudeSpawnEnv(baseEnv = process.env, options = {}) { - const nextEnv = { ...baseEnv }; - const configuredEnv = Object.prototype.hasOwnProperty.call(options, "configuredEnv") - ? options.configuredEnv - : loadClaudeConfiguredEnv(options.settingsPath ?? null); - - // If the Claude settings file is unreadable, preserve the inherited env. - if (configuredEnv == null) { - return nextEnv; - } - - for (const key of CLAUDE_LOCAL_OVERRIDE_ENV_KEYS) { - const value = nextEnv[key]; - if (!value || Object.prototype.hasOwnProperty.call(configuredEnv, key)) { - continue; - } - if (isLoopbackUrl(value)) { - delete nextEnv[key]; - } - } - - return nextEnv; -} - // --------------------------------------------------------------------------- // Availability & Auth // --------------------------------------------------------------------------- @@ -792,7 +722,6 @@ export async function runClaudeTurn(cwd, prompt, options = {}) { cwd, detached: true, // new process group for safe cancellation stdio: ["ignore", "pipe", "pipe"], // stdin ignored — prompt is passed as CLI arg - env: buildClaudeSpawnEnv(process.env), }); let pidIdentity = null; diff --git a/tests/claude-cli.test.mjs b/tests/claude-cli.test.mjs index 3cbc5b1..1063669 100644 --- a/tests/claude-cli.test.mjs +++ b/tests/claude-cli.test.mjs @@ -13,7 +13,6 @@ import { resolveDefaultModel, resolveDefaultEffort, buildArgs, - buildClaudeSpawnEnv, MODEL_ALIASES, EFFORT_ALIASES, VALID_EFFORTS, @@ -722,82 +721,6 @@ describe("buildArgs", () => { }); }); -// =========================================================================== -// buildClaudeSpawnEnv -// =========================================================================== - -describe("buildClaudeSpawnEnv", () => { - it("removes an inherited localhost Anthropic base URL when settings no longer define it", () => { - const env = buildClaudeSpawnEnv( - { - ANTHROPIC_BASE_URL: "http://127.0.0.1:55453", - CLAUDE_CODE_EFFORT_LEVEL: "xhigh", - }, - { configuredEnv: { CLAUDE_CODE_EFFORT_LEVEL: "xhigh" } } - ); - - assert.equal(env.ANTHROPIC_BASE_URL, undefined); - assert.equal(env.CLAUDE_CODE_EFFORT_LEVEL, "xhigh"); - }); - - it("keeps an explicitly configured localhost base URL", () => { - const env = buildClaudeSpawnEnv( - { ANTHROPIC_BASE_URL: "http://127.0.0.1:55453" }, - { configuredEnv: { ANTHROPIC_BASE_URL: "http://127.0.0.1:55453" } } - ); - - assert.equal(env.ANTHROPIC_BASE_URL, "http://127.0.0.1:55453"); - }); - - it("keeps non-local remote overrides even when they are not in settings", () => { - const env = buildClaudeSpawnEnv( - { ANTHROPIC_BASE_URL: "https://gateway.example.com" }, - { configuredEnv: {} } - ); - - assert.equal(env.ANTHROPIC_BASE_URL, "https://gateway.example.com"); - }); - - it("removes stale localhost proxy env vars too", () => { - const env = buildClaudeSpawnEnv( - { - ALL_PROXY: "http://localhost:8083", - HTTPS_PROXY: "http://localhost:8080", - HTTP_PROXY: "http://127.0.0.1:8787", - https_proxy: "http://localhost:8081", - http_proxy: "http://127.0.0.1:8788", - all_proxy: "http://localhost:8082", - }, - { configuredEnv: {} } - ); - - assert.equal(env.ALL_PROXY, undefined); - assert.equal(env.HTTPS_PROXY, undefined); - assert.equal(env.HTTP_PROXY, undefined); - assert.equal(env.https_proxy, undefined); - assert.equal(env.http_proxy, undefined); - assert.equal(env.all_proxy, undefined); - }); - - it("removes IPv6 loopback overrides too", () => { - const env = buildClaudeSpawnEnv( - { ANTHROPIC_BASE_URL: "http://[::1]:55453" }, - { configuredEnv: {} } - ); - - assert.equal(env.ANTHROPIC_BASE_URL, undefined); - }); - - it("preserves inherited env when Claude settings are unreadable", () => { - const env = buildClaudeSpawnEnv( - { ANTHROPIC_BASE_URL: "http://127.0.0.1:55453" }, - { configuredEnv: null } - ); - - assert.equal(env.ANTHROPIC_BASE_URL, "http://127.0.0.1:55453"); - }); -}); - // =========================================================================== // SANDBOX_READ_ONLY_TOOLS constant // ===========================================================================