diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index 027335e..f6183cc 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "cc", - "version": "1.1.0", + "version": "1.2.0", "description": "Claude Code Plugin for Codex. Delegate code reviews, investigations, and tracked tasks to Claude Code from inside Codex.", "author": { "name": "Sendbird, Inc.", diff --git a/CHANGELOG.md b/CHANGELOG.md index 6117a70..4bab30d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v1.2.0 + +- Default the Claude model for `review`, `adversarial-review`, and `rescue`/`task` to `opus` (resolved to the 1M-context variant `claude-opus-4-7[1m]`) with `xhigh` effort. The `sonnet` alias resolves to `claude-sonnet-4-6[1m]` and defaults to `high` effort; `haiku` stays on `claude-haiku-4-5` with effort unset. `--model` and `--effort` remain user-overridable; `xhigh` is now a first-class effort level and `max` is reserved for users who explicitly opt in. +- Isolate `review` and `adversarial-review` from the user repo with a three-layer design instead of the previous Bash-pattern allowlist (which the Claude CLI does not strictly enforce — once `Bash` is in the allowlist with any sub-pattern, the entire `Bash` tool opens up). Reviews now run inside an ephemeral `git worktree` checked out at the branch tip (or the original repo for `working-tree` scope, so staged/unstaged/untracked changes remain visible), use a bundled read-only git MCP server (`mcp-git` subcommand) exposing `diff`/`log`/`show`/`blame`/`status`/`grep`/`ls_files` as structured tools with strict ref/path validation, and tighten the allowlist to `Read`, `Glob`, `Grep`, `WebSearch`, `WebFetch`, and `mcp__gitReview__*` only (no `Bash` entry). +- Leave network unrestricted in the `read-only` sandbox preset so `WebFetch`/`WebSearch` and the Claude CLI's own API path keep working; safety comes from removing `Bash` from the allowlist rather than from blocking network. File writes outside the OS temp dir stay blocked. +- Expose `--effort` on `review` and `adversarial-review` and document the new defaults in `SKILL.md`, `README.md`, and the internal `cli-runtime` reference. +- Sweep stranded `review-worktrees/`, `sandbox/`, and `mcp/` runtime files older than six hours at the start of every review to reclaim resources after `kill -9` or crashed runs. + ## v1.1.0 - Restructure the internal Claude runtime and prompt-shaping guidance from pseudo-hidden `SKILL.md` files into plain internal reference documents, while keeping the public `review`, `adversarial-review`, and `rescue` skills self-sufficient on their critical invocation rules. diff --git a/README.md b/README.md index 36ccd93..bf39c4e 100644 --- a/README.md +++ b/README.md @@ -128,14 +128,17 @@ Quick routing rule: Standard read-only review of your current work. ```text -$cc:review # review uncommitted changes +$cc:review # review uncommitted changes (default: opus + xhigh effort) $cc:review --base main # review branch vs main $cc:review --scope branch # explicitly compare branch tip to base $cc:review --background # run in background, check with $cc:status later -$cc:review --model sonnet # use a specific Claude model +$cc:review --model sonnet # switch to sonnet (defaults to high effort) +$cc:review --model opus --effort high # opus with a lighter effort ``` -**Flags:** `--base `, `--scope `, `--wait`, `--background`, `--model ` +**Flags:** `--base `, `--scope `, `--wait`, `--background`, `--model `, `--effort ` + +**Defaults:** model `opus` (resolves to `claude-opus-4-7[1m]`, the 1M-context variant) with `xhigh` effort. If you pick `sonnet`, it resolves to `claude-sonnet-4-6[1m]` (also 1M context) and the default effort drops to `high`. `haiku` resolves to `claude-haiku-4-5` and has no effort setting. Pass `--model` and `--effort` to override. Scope `auto` (the default) inspects `git status` and chooses between working-tree and branch automatically. @@ -179,8 +182,8 @@ $cc:rescue --model sonnet --effort medium investigate the flaky test | `--resume-last` | Alias for `--resume` | | `--fresh` | Force a new task (don't resume) | | `--write` | Allow file edits (default) | -| `--model ` | Claude model (`sonnet`, `haiku`, or full ID) | -| `--effort ` | Reasoning effort: `low`, `medium`, `high`, `max` | +| `--model ` | Claude model (`opus`, `sonnet`, `haiku`, or full ID; defaults to `opus`. The `opus` and `sonnet` aliases resolve to their 1M-context variants `claude-opus-4-7[1m]` and `claude-sonnet-4-6[1m]`.) | +| `--effort ` | Reasoning effort: `low`, `medium`, `high`, `xhigh`, `max` (default: `xhigh` for opus, `high` for sonnet, unset for haiku) | | `--prompt-file ` | Read task description from a file | **Resume behavior:** If you don't pass `--resume` or `--fresh`, rescue checks for a resumable Claude session and asks once whether to continue or start fresh. Your phrasing guides the recommendation — "continue the last run" → resume, "start over" → fresh. diff --git a/internal-skills/cli-runtime/runtime.md b/internal-skills/cli-runtime/runtime.md index 77b78d3..c638ec2 100644 --- a/internal-skills/cli-runtime/runtime.md +++ b/internal-skills/cli-runtime/runtime.md @@ -24,8 +24,7 @@ Command selection: Routing controls: - Treat `--model`, `--effort`, `--resume`, `--resume-last`, `--fresh`, `--prompt-file`, `--view-state`, `--owner-session-id`, and `--job-id` as routing controls, not task text. -- Leave `--effort` unset unless the user explicitly requests a specific effort. -- Leave model unset by default. Add `--model` only when the user explicitly asks for one. +- Leave `--model` and `--effort` unset unless the user explicitly asks for a specific model or effort. The companion command applies these defaults itself: model defaults to `opus`, effort defaults to `xhigh` for opus, `high` for sonnet, and is left unset for haiku. - `--view-state on-success` means the user will see this companion result in the current turn, so the companion may mark it viewed on success. - `--view-state defer` means the parent is not waiting, so the companion must leave the result unread until the user explicitly checks it. - `--owner-session-id ` is an internal parent-session routing control. Preserve it when present so tracked jobs remain visible to the parent session's `$cc:status` / `$cc:result`. diff --git a/package.json b/package.json index 5e14aef..12adf18 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cc-plugin-codex", - "version": "1.1.0", + "version": "1.2.0", "description": "Claude Code Plugin for Codex by Sendbird", "type": "module", "author": { diff --git a/scripts/claude-companion.mjs b/scripts/claude-companion.mjs index 9e5df0e..414feb5 100644 --- a/scripts/claude-companion.mjs +++ b/scripts/claude-companion.mjs @@ -10,9 +10,11 @@ * * Adapted from codex-companion.mjs: * - Uses claude-cli.mjs instead of app-server/broker - * - MODEL_ALIASES: sonnet -> claude-sonnet-4-6, haiku -> claude-haiku-4-5 - * - Claude CLI effort values: low, medium, high, max - * - Legacy effort aliases: none|minimal -> low, xhigh -> max + * - MODEL_ALIASES: opus -> claude-opus-4-7[1m], sonnet -> claude-sonnet-4-6[1m], haiku -> claude-haiku-4-5 + * - Default model when --model is unset: opus + * - Default effort by model: opus -> xhigh, sonnet -> high, haiku -> unset + * - Claude CLI effort values: low, medium, high, xhigh, max + * - Legacy effort aliases: none|minimal -> low * - Review gate matches upstream setup semantics: Stop hook runs when enabled * * Subcommands: @@ -37,10 +39,20 @@ import { cancelClaudeProcess, MODEL_ALIASES, resolveEffort, + resolveDefaultModel, + resolveDefaultEffort, SANDBOX_READ_ONLY_TOOLS, createSandboxSettings, cleanupSandboxSettings, + createReviewMcpConfig, + cleanupReviewMcpConfig, + pruneStaleSandboxSettings, + pruneStaleReviewMcpConfigs, } from "./lib/claude-cli.mjs"; +import { + createReviewIsolation, + pruneStaleReviewWorktrees, +} from "./lib/review-worktree.mjs"; import { readStdinIfPiped } from "./lib/fs.mjs"; import { collectReviewContext, @@ -112,9 +124,9 @@ function printUsage() { [ "Usage:", " node scripts/claude-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]", - " node scripts/claude-companion.mjs review [--wait|--background] [--base ] [--scope ]", - " node scripts/claude-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [focus text]", - " node scripts/claude-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", + " node scripts/claude-companion.mjs review [--wait|--background] [--base ] [--scope ] [--model ] [--effort ]", + " node scripts/claude-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [--model ] [--effort ] [focus text]", + " node scripts/claude-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", " node scripts/claude-companion.mjs status [job-id] [--all] [--json]", " node scripts/claude-companion.mjs result [job-id] [--json]", " node scripts/claude-companion.mjs cancel [job-id] [--json]", @@ -462,6 +474,11 @@ async function executeReviewRun(request) { ensureClaudeReady(request.cwd); ensureGitRepository(request.cwd); + // Sweep dead resources from previous crashed runs before allocating new ones. + try { pruneStaleReviewWorktrees(request.cwd); } catch {} + try { pruneStaleSandboxSettings(); } catch {} + try { pruneStaleReviewMcpConfigs(); } catch {} + const target = resolveReviewTarget(request.cwd, { base: request.base, scope: request.scope @@ -470,19 +487,32 @@ async function executeReviewRun(request) { const reviewName = request.reviewName ?? "Review"; if (reviewName === "Review") { - // Standard review via Claude CLI — read-only sandbox + // Standard review via Claude CLI — read-only sandbox + ephemeral worktree. const context = collectReviewContext(request.cwd, target); const prompt = buildReviewPrompt(context); - const sandboxSettingsFile = createSandboxSettings("read-only"); let result; + const sandboxSettingsFile = createSandboxSettings("read-only"); try { - result = await runClaudeReview(request.cwd, prompt, { - model: request.model, - onProgress: request.onProgress, - onSpawn: request.onSpawn, - permissionMode: "dontAsk", - settingsFile: sandboxSettingsFile, - }); + const isolation = createReviewIsolation(request.cwd, target, { label: "review" }); + try { + const mcpConfigFile = createReviewMcpConfig(isolation.gitRoot); + try { + result = await runClaudeReview(isolation.cwd, prompt, { + model: request.model, + effort: request.effort, + onProgress: request.onProgress, + onSpawn: request.onSpawn, + permissionMode: "dontAsk", + settingsFile: sandboxSettingsFile, + mcpConfigFile, + strictMcpConfig: true, + }); + } finally { + cleanupReviewMcpConfig(mcpConfigFile); + } + } finally { + isolation.cleanup(); + } } finally { cleanupSandboxSettings(sandboxSettingsFile); } @@ -523,25 +553,40 @@ async function executeReviewRun(request) { }; } - // Adversarial review with structured output — read-only sandbox + // Adversarial review with structured output — read-only sandbox + ephemeral worktree. const context = collectReviewContext(request.cwd, target); const prompt = buildAdversarialReviewPrompt(context, focusText); const schema = readOutputSchema(REVIEW_SCHEMA_PATH); - const sandboxSettingsFile = createSandboxSettings("read-only"); let result; + const sandboxSettingsFile = createSandboxSettings("read-only"); try { - result = await runClaudeAdversarialReview( - context.repoRoot, - prompt, - schema, - { - model: request.model, - onProgress: request.onProgress, - onSpawn: request.onSpawn, - permissionMode: "dontAsk", - settingsFile: sandboxSettingsFile, + const isolation = createReviewIsolation(context.repoRoot, target, { + label: "adversarial-review", + }); + try { + const mcpConfigFile = createReviewMcpConfig(isolation.gitRoot); + try { + result = await runClaudeAdversarialReview( + isolation.cwd, + prompt, + schema, + { + model: request.model, + effort: request.effort, + onProgress: request.onProgress, + onSpawn: request.onSpawn, + permissionMode: "dontAsk", + settingsFile: sandboxSettingsFile, + mcpConfigFile, + strictMcpConfig: true, + } + ); + } finally { + cleanupReviewMcpConfig(mcpConfigFile); } - ); + } finally { + isolation.cleanup(); + } } finally { cleanupSandboxSettings(sandboxSettingsFile); } @@ -825,11 +870,12 @@ function buildReviewRequest({ base, scope, model, + effort, focusText, reviewName, markViewedOnSuccess }) { - return { cwd, base, scope, model, focusText, reviewName, markViewedOnSuccess }; + return { cwd, base, scope, model, effort, focusText, reviewName, markViewedOnSuccess }; } function spawnDetachedReviewWorker(cwd, jobId) { @@ -1183,7 +1229,7 @@ async function resolveLatestResumableSession(cwd, options = {}) { async function handleReviewCommand(argv, config) { const { options, positionals } = parseCommandInput(argv, { - valueOptions: ["base", "scope", "model", "cwd", "view-state", "job-id", "owner-session-id"], + valueOptions: ["base", "scope", "model", "effort", "cwd", "view-state", "job-id", "owner-session-id"], booleanOptions: ["json", "background", "wait"], aliasMap: { m: "model" @@ -1205,6 +1251,10 @@ async function handleReviewCommand(argv, config) { Boolean(options.background) ); + const requestedModel = normalizeRequestedModel(options.model); + const resolvedModel = resolveDefaultModel(requestedModel); + const resolvedEffort = resolveDefaultEffort(resolvedModel, options.effort); + await withReleasedReservation(workspaceRoot, explicitJobId, async () => { // Validate inside the reservation guard so failures do not leak markers. config.validateRequest?.(target, focusText); @@ -1227,7 +1277,8 @@ async function handleReviewCommand(argv, config) { cwd, base: options.base, scope: options.scope, - model: options.model, + model: resolvedModel, + effort: resolvedEffort, focusText, reviewName: config.reviewName, markViewedOnSuccess @@ -1248,7 +1299,8 @@ async function handleReviewCommand(argv, config) { cwd, base: options.base, scope: options.scope, - model: options.model, + model: resolvedModel, + effort: resolvedEffort, focusText, reviewName: config.reviewName, onProgress: progress, @@ -1300,8 +1352,10 @@ async function handleTask(argv) { const cwd = resolveCommandCwd(options); const workspaceRoot = resolveCommandWorkspace(options); - const model = normalizeRequestedModel(options.model); - const effort = resolveEffort(options.effort) ?? null; + const requestedModel = normalizeRequestedModel(options.model); + const model = resolveDefaultModel(requestedModel); + const resolvedEffort = resolveDefaultEffort(model, options.effort); + const effort = resolvedEffort ? resolveEffort(resolvedEffort) : null; const prompt = readTaskPrompt(cwd, options, positionals); const markViewedOnSuccess = resolveMarkViewedOnSuccess( options["view-state"], @@ -1819,11 +1873,20 @@ async function main() { case "cancel": await handleCancel(argv); break; + case "mcp-git": + await handleMcpGit(argv); + break; default: throw new Error(`Unknown subcommand: ${subcommand}`); } } +async function handleMcpGit(_argv) { + const { runMcpGitServer } = await import("./lib/mcp-git.mjs"); + const exitCode = await runMcpGitServer(); + process.exit(exitCode ?? 0); +} + main().catch((error) => { const message = error instanceof Error ? error.message : String(error); process.stderr.write(`${message}\n`); diff --git a/scripts/lib/claude-cli.mjs b/scripts/lib/claude-cli.mjs index 3cff9f7..c06c9ab 100644 --- a/scripts/lib/claude-cli.mjs +++ b/scripts/lib/claude-cli.mjs @@ -12,6 +12,7 @@ import { randomBytes } from "node:crypto"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; +import { fileURLToPath } from "node:url"; import { normalizePathSlashes, resolvePluginRuntimeRoot } from "./codex-paths.mjs"; import { getProcessIdentity, validateProcessIdentity } from "./process.mjs"; @@ -364,6 +365,41 @@ export const SANDBOX_READ_ONLY_TOOLS = [ "Agent(explore,plan)", ]; +/** + * MCP server name used for the bundled read-only git MCP server. Exposes tools as + * `mcp____` (see scripts/lib/mcp-git.mjs for the catalog). + */ +export const REVIEW_MCP_SERVER_NAME = "gitReview"; + +export const REVIEW_MCP_TOOL_NAMES = [ + "diff", + "log", + "show", + "blame", + "status", + "grep", + "ls_files", +]; + +export const REVIEW_MCP_ALLOWED_TOOLS = REVIEW_MCP_TOOL_NAMES.map( + (name) => `mcp__${REVIEW_MCP_SERVER_NAME}__${name}` +); + +/** + * Tools exposed to review/adversarial-review runs. Bash is intentionally absent — + * the Claude CLI does not strictly enforce `Bash(:*)` sub-patterns, so any + * Bash entry would open the full Bash surface. Git operations are surfaced through + * the bundled read-only git MCP server instead (`REVIEW_MCP_ALLOWED_TOOLS`). + */ +export const SANDBOX_REVIEW_TOOLS = [ + "Read", + "Glob", + "Grep", + "WebSearch", + "WebFetch", + ...REVIEW_MCP_ALLOWED_TOOLS, +]; + // --------------------------------------------------------------------------- // Sandbox Settings — OS-level isolation via Claude Code's sandbox feature. // Written to a temp file and passed via --settings. @@ -372,7 +408,10 @@ export const SANDBOX_READ_ONLY_TOOLS = [ /** * Sandbox presets matching Codex sandbox modes. * - * read-only: no writes at all, no network from Bash. + * read-only: no file writes outside the OS temp dir. Network is allowed so + * that `WebFetch`, `WebSearch`, and the Claude CLI's API path keep + * working; the review allowlist excludes Bash entirely, so there + * is no shell surface to exfiltrate or mutate state through. * workspace-write: Bash can write to cwd + OS temp dir only, no network from Bash. * All tools allowed (no allowedTools restriction). */ @@ -380,13 +419,12 @@ export const SANDBOX_SETTINGS = { "read-only": { sandbox: { enabled: true, - autoAllowBashIfSandboxed: true, + // No Bash in the review allowlist, but keep this flag conservative so that + // any sandbox-aware tool still has to opt in explicitly. + autoAllowBashIfSandboxed: false, filesystem: { allowWrite: [SANDBOX_TEMP_DIR], }, - network: { - allowedDomains: [], - }, }, }, "workspace-write": { @@ -430,22 +468,157 @@ export function cleanupSandboxSettings(filePath) { } } +// --------------------------------------------------------------------------- +// Review MCP config +// --------------------------------------------------------------------------- + +/** + * Resolve the absolute path of the bundled claude-companion.mjs script so the + * `mcp-git` subcommand can be invoked from any cwd. Uses `fileURLToPath` so the + * resolution works on Windows where `new URL(...).pathname` is not a usable + * filesystem path. + */ +function resolveCompanionScriptPath() { + return path.resolve( + path.dirname(fileURLToPath(import.meta.url)), + "..", + "claude-companion.mjs" + ); +} + +/** + * Write an `--mcp-config` JSON file that registers the bundled read-only git + * MCP server. The server's CC_GIT_ROOT env var is set to `gitRoot` so that its + * tool handlers operate strictly inside the review worktree. + */ +export function createReviewMcpConfig(gitRoot) { + if (!gitRoot || typeof gitRoot !== "string") { + throw new Error("createReviewMcpConfig: gitRoot is required"); + } + const companionScript = resolveCompanionScriptPath(); + const config = { + mcpServers: { + [REVIEW_MCP_SERVER_NAME]: { + command: process.execPath, + args: [companionScript, "mcp-git"], + env: { + CC_GIT_ROOT: gitRoot, + }, + }, + }, + }; + + const dir = path.join(resolvePluginRuntimeRoot(), "mcp"); + fs.mkdirSync(dir, { recursive: true, mode: 0o700 }); + const tmpFile = path.join( + dir, + `cc-mcp-${process.pid}-${Date.now().toString(36)}-${randomBytes(6).toString("hex")}.json` + ); + fs.writeFileSync(tmpFile, JSON.stringify(config), { + encoding: "utf8", + mode: 0o600, + }); + return tmpFile; +} + +export function cleanupReviewMcpConfig(filePath) { + if (filePath) { + try { fs.unlinkSync(filePath); } catch {} + } +} + +// --------------------------------------------------------------------------- +// Stale tmp sweepers — reclaim files left behind by SIGKILL/crashes. +// --------------------------------------------------------------------------- + +function pruneStaleTempFiles(subdir, options = {}) { + const prefix = options.prefix; + const maxAgeMs = options.maxAgeMs ?? 6 * 60 * 60 * 1000; + const dir = path.join(resolvePluginRuntimeRoot(), subdir); + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + const now = Date.now(); + for (const entry of entries) { + if (!entry.isFile()) continue; + if (prefix && !entry.name.startsWith(prefix)) continue; + const full = path.join(dir, entry.name); + let stat; + try { + stat = fs.statSync(full); + } catch { + continue; + } + if (now - stat.mtimeMs < maxAgeMs) continue; + try { + fs.unlinkSync(full); + } catch { + // Best effort: leave on disk rather than crash callers. + } + } +} + +/** + * Sweep sandbox-settings JSON files left behind by crashes. Call this at the + * start of any flow that creates sandbox settings so they do not accumulate. + */ +export function pruneStaleSandboxSettings(options = {}) { + pruneStaleTempFiles("sandbox", { prefix: "cc-sandbox-", ...options }); +} + +/** + * Sweep review MCP config JSON files left behind by crashes. The same SIGKILL + * window that strands a worktree can strand the MCP config; clean both. + */ +export function pruneStaleReviewMcpConfigs(options = {}) { + pruneStaleTempFiles("mcp", { prefix: "cc-mcp-", ...options }); +} + // --------------------------------------------------------------------------- // Model & Effort Mapping // --------------------------------------------------------------------------- export const MODEL_ALIASES = new Map([ - ["sonnet", "claude-sonnet-4-6"], + ["opus", "claude-opus-4-7[1m]"], + ["sonnet", "claude-sonnet-4-6[1m]"], ["haiku", "claude-haiku-4-5"], ]); export const EFFORT_ALIASES = { none: "low", minimal: "low", - xhigh: "max", }; -export const VALID_EFFORTS = new Set(["low", "medium", "high", "max"]); +export const VALID_EFFORTS = new Set(["low", "medium", "high", "xhigh", "max"]); + +export const DEFAULT_MODEL = "opus"; + +export const DEFAULT_EFFORT_BY_MODEL = new Map([ + ["opus", "xhigh"], + ["claude-opus-4-7", "xhigh"], + ["claude-opus-4-7[1m]", "xhigh"], + ["sonnet", "high"], + ["claude-sonnet-4-6", "high"], + ["claude-sonnet-4-6[1m]", "high"], +]); + +export function resolveDefaultModel(model) { + if (model == null || String(model).trim() === "") { + return DEFAULT_MODEL; + } + return model; +} + +export function resolveDefaultEffort(model, effort) { + if (effort != null && String(effort).trim() !== "") { + return effort; + } + const key = String(model ?? "").trim().toLowerCase(); + return DEFAULT_EFFORT_BY_MODEL.get(key); +} export function resolveModel(model) { if (!model) return undefined; @@ -523,6 +696,12 @@ export function buildArgs(prompt, options = {}) { if (options.settingsFile) { args.push("--settings", options.settingsFile); } + if (options.mcpConfigFile) { + args.push("--mcp-config", options.mcpConfigFile); + } + if (options.strictMcpConfig) { + args.push("--strict-mcp-config"); + } args.push("--", prompt); return args; @@ -625,12 +804,17 @@ export async function runClaudeTurn(cwd, prompt, options = {}) { /** * Execute a review (non-streaming, no session persistence). + * + * The default allowlist is `SANDBOX_REVIEW_TOOLS` (Read/Glob/Grep/Web + the git + * MCP tool surface). Callers that want to run with an alternative allowlist — + * e.g., legacy `SANDBOX_READ_ONLY_TOOLS` for back-compat — can override via + * `options.allowedTools`. Bash is intentionally excluded by default. */ export async function runClaudeReview(cwd, prompt, options = {}) { // Use streaming mode (same as runClaudeTurn) for progress reporting const result = await runClaudeTurn(cwd, prompt, { noSessionPersistence: true, - allowedTools: SANDBOX_READ_ONLY_TOOLS, + allowedTools: SANDBOX_REVIEW_TOOLS, ...options, }); diff --git a/scripts/lib/mcp-git.mjs b/scripts/lib/mcp-git.mjs new file mode 100644 index 0000000..294aba5 --- /dev/null +++ b/scripts/lib/mcp-git.mjs @@ -0,0 +1,532 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +/** + * Read-only git MCP server over stdio. + * + * Exposes a small surface of git read-only operations as structured MCP tools so + * that review and adversarial-review runs can inspect commits, diffs, and history + * without exposing the Bash tool. Tool calls land here, argv is constructed inside + * the server (no shell), and the working directory is locked to CC_GIT_ROOT. + */ + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +export const MCP_PROTOCOL_VERSION = "2024-11-05"; +export const MCP_SERVER_NAME = "cc-plugin-codex-git"; +export const MCP_SERVER_VERSION = "1.0.0"; +export const MAX_OUTPUT_BYTES = 64 * 1024 * 1024; +export const MAX_STDIN_BUFFER_BYTES = 1 * 1024 * 1024; + +// Refs may contain letters, digits, and the small set of characters git uses for +// range/relative syntax. We deliberately reject shell metacharacters and spaces +// even though spawnSync does not invoke a shell — keeping the surface narrow makes +// the invariant easy to audit. +const REF_PATTERN = /^[A-Za-z0-9._/\-~^@]{1,200}(?:\.\.\.?[A-Za-z0-9._/\-~^@]{1,200})?$/; + +// Pathspecs ride directly to git after the `--` separator so they cannot become +// flags, but we still gate them: no shell metacharacters, no NUL, no leading `-`, +// no parent traversal, and (when absolute) the resolved location must stay inside +// CC_GIT_ROOT. +const PATHSPEC_FORBIDDEN = /[\0\n\r;&|`$<>*?(){}\[\]"'\\]/; + +const LIMIT_DEFAULTS = { + log: 20, + blame: 0, + grep: 200, +}; + +function isPlainObject(value) { + return value !== null && typeof value === "object" && !Array.isArray(value); +} + +function ensureValidRef(ref, label = "ref") { + if (typeof ref !== "string" || !ref) { + throw new Error(`${label} must be a non-empty string`); + } + if (!REF_PATTERN.test(ref)) { + throw new Error(`${label} contains characters that are not allowed`); + } + // A leading `-` would be interpreted by git as an option (e.g., `-p`, + // `--ext-diff`) rather than a ref. Reject explicitly even though `--` follows + // ref tokens elsewhere, since refs are emitted *before* the `--` separator. + if (ref.startsWith("-")) { + throw new Error(`${label} may not start with '-' (interpreted as flag): ${ref}`); + } + return ref; +} + +function ensureValidPath(spec, root) { + if (typeof spec !== "string" || !spec) { + throw new Error("path must be a non-empty string"); + } + if (PATHSPEC_FORBIDDEN.test(spec)) { + throw new Error(`path contains forbidden characters: ${spec}`); + } + if (spec.startsWith("-")) { + throw new Error(`path may not start with '-' (interpreted as flag): ${spec}`); + } + const resolved = path.resolve(root, spec); + const rootResolved = path.resolve(root); + const rel = path.relative(rootResolved, resolved); + if (rel === "" || rel === ".") { + return spec; + } + if (rel.startsWith("..") || path.isAbsolute(rel)) { + throw new Error(`path escapes the git root: ${spec}`); + } + return spec; +} + +function ensureValidPathList(paths, root) { + if (paths == null) return []; + if (!Array.isArray(paths)) { + throw new Error("paths must be an array of strings"); + } + if (paths.length > 64) { + throw new Error("too many paths (max 64)"); + } + return paths.map((p) => ensureValidPath(p, root)); +} + +function ensureBoundedInt(value, { min, max, label }) { + if (value == null) return null; + const num = typeof value === "number" ? value : Number.parseInt(String(value), 10); + if (!Number.isFinite(num) || !Number.isInteger(num)) { + throw new Error(`${label} must be an integer`); + } + if (num < min || num > max) { + throw new Error(`${label} must be between ${min} and ${max}`); + } + return num; +} + +function ensureValidBlameRange(range) { + if (range == null) return null; + if (typeof range !== "string") { + throw new Error("range must be a string like '12,40'"); + } + if (!/^\d{1,7}(?:,\d{1,7})?$/.test(range)) { + throw new Error("range must look like '12' or '12,40'"); + } + return range; +} + +function runGit(root, args) { + const result = spawnSync("git", args, { + cwd: root, + encoding: "utf8", + maxBuffer: MAX_OUTPUT_BYTES, + env: { + ...process.env, + // Disable git config that could side-effect (no signing, no pagers, no + // hooks running through git commands we wrap). Use POSIX `:` and `cat` + // builtins that work the same on Linux, macOS, and git-for-Windows. + GIT_PAGER: "cat", + GIT_EDITOR: ":", + GIT_TERMINAL_PROMPT: "0", + }, + timeout: 30_000, + }); + return { + stdout: result.stdout ?? "", + stderr: result.stderr ?? "", + exitCode: result.status, + signal: result.signal ?? null, + error: result.error?.message ?? null, + }; +} + +// --------------------------------------------------------------------------- +// Tool registry +// --------------------------------------------------------------------------- + +export const TOOL_DEFINITIONS = [ + { + name: "diff", + description: + "Show a git diff inside the review worktree. Optional `refs` (e.g., 'HEAD~3..HEAD' or 'origin/main...HEAD') and optional `paths`. Use `stat: true` for a stat-only summary.", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + refs: { type: "string", description: "Git ref or ref range." }, + paths: { + type: "array", + items: { type: "string" }, + description: "Pathspecs relative to the review worktree.", + }, + stat: { type: "boolean", description: "Return --stat summary instead of full diff." }, + head: { + type: "integer", + minimum: 1, + maximum: 5000, + description: "Truncate output to the first N lines.", + }, + }, + }, + }, + { + name: "log", + description: + "List recent commits. Optional `refs`, `paths`, `limit` (default 20), `format` ('oneline' | 'short' | 'full').", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + refs: { type: "string" }, + paths: { type: "array", items: { type: "string" } }, + limit: { type: "integer", minimum: 1, maximum: 500 }, + format: { type: "string", enum: ["oneline", "short", "medium", "full"] }, + }, + }, + }, + { + name: "show", + description: "Show a single commit (`ref` required). Optional `paths` to restrict to specific files. Optional `stat: true` for stat-only.", + inputSchema: { + type: "object", + additionalProperties: false, + required: ["ref"], + properties: { + ref: { type: "string" }, + paths: { type: "array", items: { type: "string" } }, + stat: { type: "boolean" }, + }, + }, + }, + { + name: "blame", + description: "Show line-by-line authorship of a file. `path` required. Optional `range` ('12,40').", + inputSchema: { + type: "object", + additionalProperties: false, + required: ["path"], + properties: { + path: { type: "string" }, + range: { type: "string", description: "Line range like '12,40' or '12'." }, + }, + }, + }, + { + name: "status", + description: "Show working-tree status. Use `porcelain: true` for machine-readable output.", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + porcelain: { type: "boolean" }, + }, + }, + }, + { + name: "grep", + description: "Search the tracked tree with `git grep`. `pattern` required. Optional `paths`, `limit` (default 200 lines).", + inputSchema: { + type: "object", + additionalProperties: false, + required: ["pattern"], + properties: { + pattern: { type: "string" }, + paths: { type: "array", items: { type: "string" } }, + limit: { type: "integer", minimum: 1, maximum: 5000 }, + ignoreCase: { type: "boolean" }, + word: { type: "boolean", description: "Match whole words only." }, + }, + }, + }, + { + name: "ls_files", + description: "List tracked files. Optional `paths` to filter.", + inputSchema: { + type: "object", + additionalProperties: false, + properties: { + paths: { type: "array", items: { type: "string" } }, + }, + }, + }, +]; + +// --------------------------------------------------------------------------- +// Tool handlers +// --------------------------------------------------------------------------- + +function truncateOutput(text, maxLines) { + if (!maxLines || maxLines <= 0) return text; + const lines = text.split("\n"); + if (lines.length <= maxLines) return text; + return [ + ...lines.slice(0, maxLines), + `... (truncated; ${lines.length - maxLines} more line(s))`, + ].join("\n"); +} + +function formatResult(result, options = {}) { + const { truncateLines, label } = options; + let text = result.stdout || ""; + if (truncateLines && text) { + text = truncateOutput(text, truncateLines); + } + if (!text && result.stderr) { + text = `[git ${label ?? "command"} stderr]\n${result.stderr}`; + } + if (result.error) { + text = `${text}\n[error: ${result.error}]`.trim(); + } + const isError = + Boolean(result.error) || + (result.exitCode != null && result.exitCode !== 0 && !result.stdout); + return { text, isError }; +} + +const HANDLERS = { + diff(args, root) { + const cmd = ["diff", "--no-color"]; + if (args?.stat === true) cmd.push("--stat"); + if (args?.refs != null) cmd.push(ensureValidRef(args.refs, "refs")); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + const result = runGit(root, cmd); + const headLimit = ensureBoundedInt(args?.head, { min: 1, max: 5000, label: "head" }); + return formatResult(result, { truncateLines: headLimit, label: "diff" }); + }, + + log(args, root) { + const cmd = ["log", "--no-color"]; + const limit = ensureBoundedInt(args?.limit, { min: 1, max: 500, label: "limit" }) ?? LIMIT_DEFAULTS.log; + cmd.push(`-n`, String(limit)); + const format = args?.format; + if (format === "oneline" || format == null) { + cmd.push("--oneline"); + } else if (format === "short" || format === "medium" || format === "full") { + cmd.push(`--pretty=${format}`); + } else { + throw new Error(`format must be one of: oneline, short, medium, full`); + } + if (args?.refs != null) cmd.push(ensureValidRef(args.refs, "refs")); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + return formatResult(runGit(root, cmd), { label: "log" }); + }, + + show(args, root) { + const cmd = ["show", "--no-color"]; + if (args?.stat === true) cmd.push("--stat"); + cmd.push(ensureValidRef(args?.ref, "ref")); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + return formatResult(runGit(root, cmd), { label: "show" }); + }, + + blame(args, root) { + // `git blame` does not accept `--no-color`; pass color config via -c instead. + const cmd = ["-c", "color.ui=false", "blame"]; + const range = ensureValidBlameRange(args?.range); + if (range) cmd.push("-L", range); + cmd.push(ensureValidPath(args?.path, root)); + return formatResult(runGit(root, cmd), { label: "blame" }); + }, + + status(args, root) { + const cmd = ["status"]; + if (args?.porcelain === true) cmd.push("--porcelain"); + return formatResult(runGit(root, cmd), { label: "status" }); + }, + + grep(args, root) { + const cmd = ["grep", "-n", "--no-color"]; + if (args?.ignoreCase === true) cmd.push("-i"); + if (args?.word === true) cmd.push("-w"); + const pattern = args?.pattern; + if (typeof pattern !== "string" || !pattern) { + throw new Error("pattern must be a non-empty string"); + } + if (pattern.length > 1024) { + throw new Error("pattern too long (max 1024 chars)"); + } + if (/[\0\n\r]/.test(pattern)) { + throw new Error("pattern contains NUL/newline characters"); + } + cmd.push("-e", pattern); + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + const limit = ensureBoundedInt(args?.limit, { min: 1, max: 5000, label: "limit" }) ?? LIMIT_DEFAULTS.grep; + return formatResult(runGit(root, cmd), { truncateLines: limit, label: "grep" }); + }, + + ls_files(args, root) { + const cmd = ["ls-files"]; + const paths = ensureValidPathList(args?.paths, root); + if (paths.length > 0) cmd.push("--", ...paths); + return formatResult(runGit(root, cmd), { label: "ls-files" }); + }, +}; + +// --------------------------------------------------------------------------- +// JSON-RPC dispatch +// --------------------------------------------------------------------------- + +export function handleMcpRequest(request, root) { + if (!isPlainObject(request)) { + return { + jsonrpc: "2.0", + id: null, + error: { code: -32600, message: "Invalid Request" }, + }; + } + const { method, id, params } = request; + const respondError = (code, message) => ({ + jsonrpc: "2.0", + id: id ?? null, + error: { code, message }, + }); + + if (method === "initialize") { + return { + jsonrpc: "2.0", + id, + result: { + protocolVersion: MCP_PROTOCOL_VERSION, + capabilities: { tools: {} }, + serverInfo: { + name: MCP_SERVER_NAME, + version: MCP_SERVER_VERSION, + }, + }, + }; + } + + if (method === "initialized" || method === "notifications/initialized") { + return null; + } + + if (method === "ping") { + return { jsonrpc: "2.0", id, result: {} }; + } + + if (method === "tools/list") { + return { + jsonrpc: "2.0", + id, + result: { tools: TOOL_DEFINITIONS }, + }; + } + + if (method === "tools/call") { + const name = params?.name; + const args = params?.arguments ?? {}; + const handler = HANDLERS[name]; + if (!handler) { + return respondError(-32601, `Unknown tool: ${name}`); + } + try { + const { text, isError } = handler(args, root); + return { + jsonrpc: "2.0", + id, + result: { + content: [{ type: "text", text: text || "(empty)" }], + isError: Boolean(isError), + }, + }; + } catch (err) { + return { + jsonrpc: "2.0", + id, + result: { + content: [{ type: "text", text: `Error: ${err.message}` }], + isError: true, + }, + }; + } + } + + return respondError(-32601, `Method not found: ${method}`); +} + +// --------------------------------------------------------------------------- +// Server entrypoint +// --------------------------------------------------------------------------- + +export async function runMcpGitServer({ + env = process.env, + stdin = process.stdin, + stdout = process.stdout, + stderr = process.stderr, +} = {}) { + const rawRoot = env.CC_GIT_ROOT; + if (!rawRoot) { + stderr.write("[mcp-git] CC_GIT_ROOT env var is required\n"); + return 2; + } + const root = path.resolve(rawRoot); + if (!fs.existsSync(root) || !fs.statSync(root).isDirectory()) { + stderr.write(`[mcp-git] CC_GIT_ROOT not found or not a directory: ${root}\n`); + return 2; + } + + let buffer = ""; + stdin.setEncoding("utf8"); + + const writeResponse = (response) => { + if (response == null) return; + stdout.write(`${JSON.stringify(response)}\n`); + }; + + return await new Promise((resolve) => { + stdin.on("data", (chunk) => { + buffer += chunk; + if (buffer.length > MAX_STDIN_BUFFER_BYTES) { + // A well-behaved MCP client sends newline-delimited JSON. If we have grown + // past the cap without a newline, drop the oversized prefix so memory is + // bounded — subsequent data may still contain valid frames. + const lastNewline = buffer.lastIndexOf("\n"); + if (lastNewline === -1) { + buffer = buffer.slice(-MAX_STDIN_BUFFER_BYTES); + } else { + buffer = buffer.slice(lastNewline + 1); + } + } + let newlineIndex; + while ((newlineIndex = buffer.indexOf("\n")) !== -1) { + const line = buffer.slice(0, newlineIndex).trim(); + buffer = buffer.slice(newlineIndex + 1); + if (!line) continue; + let request; + try { + request = JSON.parse(line); + } catch { + writeResponse({ + jsonrpc: "2.0", + id: null, + error: { code: -32700, message: "Parse error" }, + }); + continue; + } + const response = handleMcpRequest(request, root); + if (!response) { + continue; + } + // Error responses (including -32600 for non-object payloads) must always + // be emitted, even when the request has no `id`; otherwise the caller has + // no signal that the framing was wrong. Result responses only flow back + // when the request had an id (i.e., was a request, not a notification). + const hasRequestId = + isPlainObject(request) && + request.id !== undefined && + request.id !== null; + if (response.error || hasRequestId) { + writeResponse(response); + } + } + }); + + stdin.on("end", () => { + resolve(0); + }); + }); +} diff --git a/scripts/lib/review-worktree.mjs b/scripts/lib/review-worktree.mjs new file mode 100644 index 0000000..b82792e --- /dev/null +++ b/scripts/lib/review-worktree.mjs @@ -0,0 +1,187 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +/** + * Ephemeral git worktree lifecycle for review/adversarial-review runs. + * + * A review is run inside a throwaway `git worktree` so that any side effect that + * slips past the allowlist (a stray `Bash` injection, a tool that writes outside + * `/tmp`, etc.) is contained and disappears on cleanup. The worktree shares the + * primary repo's object database, so creating one is cheap. + */ + +import { spawnSync } from "node:child_process"; +import { randomBytes } from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; +import { resolvePluginRuntimeRoot } from "./codex-paths.mjs"; + +const WORKTREE_DIR_NAME = "review-worktrees"; + +function runGit(cwd, args, options = {}) { + return spawnSync("git", args, { + cwd, + encoding: "utf8", + timeout: 30_000, + env: { + ...process.env, + GIT_TERMINAL_PROMPT: "0", + // Use git's own no-op editor so review never spawns a real editor. + // `:` is a builtin in both POSIX shells and git's exec resolution. + GIT_EDITOR: ":", + }, + ...options, + }); +} + +function resolveWorktreesRoot() { + const root = path.join(resolvePluginRuntimeRoot(), WORKTREE_DIR_NAME); + fs.mkdirSync(root, { recursive: true, mode: 0o700 }); + return root; +} + +function makeWorktreePath(label) { + const slug = label && /^[A-Za-z0-9._-]+$/.test(label) ? label : "review"; + return path.join( + resolveWorktreesRoot(), + `${slug}-${process.pid}-${Date.now().toString(36)}-${randomBytes(4).toString("hex")}` + ); +} + +/** + * Resolve the canonical git ref to materialize for a review. We default to HEAD + * (the branch tip); callers may pass a specific ref (branch review) or "HEAD" to + * be explicit. + */ +export function resolveBaseRef(repoRoot, requestedRef = "HEAD") { + const ref = requestedRef && /^[A-Za-z0-9._/\-~^@]+$/.test(requestedRef) + ? requestedRef + : "HEAD"; + const result = runGit(repoRoot, ["rev-parse", "--verify", `${ref}^{commit}`]); + if (result.status !== 0) { + throw new Error( + `git rev-parse ${ref} failed: ${(result.stderr ?? "").trim() || "unknown error"}` + ); + } + return result.stdout.trim(); +} + +/** + * Create an ephemeral worktree at the resolved ref. Returns `{ path, cleanup }`. + * The caller MUST invoke `cleanup()` (typically in a `finally` block) so that + * the worktree is removed even if the review fails. + */ +export function createReviewWorktree(repoRoot, { label = "review", ref = "HEAD" } = {}) { + const commit = resolveBaseRef(repoRoot, ref); + const worktreePath = makeWorktreePath(label); + + // `git worktree add --detach ` performs the checkout in one step; + // splitting it into add+checkout is unnecessary and slower. + const created = runGit(repoRoot, [ + "worktree", + "add", + "--detach", + worktreePath, + commit, + ]); + if (created.status !== 0) { + // `git worktree add` may have partially registered a `.git/worktrees/` + // bookkeeping entry before failing on checkout or ENOSPC. Remove the + // filesystem directory and then prune so the partial entry is reclaimed. + cleanupWorktreeDir(worktreePath); + runGit(repoRoot, ["worktree", "prune"]); + throw new Error( + `git worktree add failed: ${(created.stderr ?? "").trim() || "unknown error"}` + ); + } + + let removed = false; + const cleanup = () => { + if (removed) return; + removed = true; + safeRemoveWorktree(repoRoot, worktreePath); + }; + + return { path: worktreePath, commit, cleanup }; +} + +function safeRemoveWorktree(repoRoot, worktreePath) { + // Best-effort: ask git to remove, then ensure the directory is gone. + runGit(repoRoot, ["worktree", "remove", "--force", worktreePath]); + cleanupWorktreeDir(worktreePath); + // `git worktree prune` cleans up the bookkeeping under .git/worktrees so that + // a future cleanup pass does not see a dangling entry pointing at a deleted dir. + runGit(repoRoot, ["worktree", "prune"]); +} + +function cleanupWorktreeDir(worktreePath) { + try { + fs.rmSync(worktreePath, { recursive: true, force: true, maxRetries: 3 }); + } catch { + // Last resort — leave on disk rather than crash the review pipeline. + } +} + +/** + * Choose how to expose the repository to a review run. + * + * For branch reviews we run inside an ephemeral worktree checked out at the + * branch tip: that gives us mutation isolation without losing any of the + * commits Claude needs to inspect. + * + * For working-tree reviews we deliberately *do not* create a worktree. A + * worktree-from-HEAD would hide the very staged/unstaged/untracked changes + * that the reviewer is supposed to inspect — `git status` would report clean, + * `git diff` would show nothing, and the MCP server pointed at the worktree + * would mislead Claude into thinking the repo is unchanged. Instead we run in + * the original repo and rely on the Bash-free allowlist for containment. + * + * Returns `{ cwd, gitRoot, cleanup }`. `gitRoot` is the path the MCP git server + * should be rooted at; `cwd` is what the Claude CLI should treat as the + * working directory. + */ +export function createReviewIsolation(repoRoot, target, { label = "review" } = {}) { + if (target?.mode === "working-tree") { + return { + cwd: repoRoot, + gitRoot: repoRoot, + cleanup: () => {}, + isolated: false, + }; + } + const worktree = createReviewWorktree(repoRoot, { label }); + return { + cwd: worktree.path, + gitRoot: worktree.path, + cleanup: () => worktree.cleanup(), + isolated: true, + }; +} + +/** + * Sweep stale worktrees left behind by crashes. Called on every review start + * so that long-running plugin installs do not accumulate dead worktrees. + */ +export function pruneStaleReviewWorktrees(repoRoot, { maxAgeMs = 6 * 60 * 60 * 1000 } = {}) { + const root = resolveWorktreesRoot(); + let entries; + try { + entries = fs.readdirSync(root, { withFileTypes: true }); + } catch { + return; + } + const now = Date.now(); + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const full = path.join(root, entry.name); + let stat; + try { + stat = fs.statSync(full); + } catch { + continue; + } + if (now - stat.mtimeMs < maxAgeMs) continue; + safeRemoveWorktree(repoRoot, full); + } +} diff --git a/skills/adversarial-review/SKILL.md b/skills/adversarial-review/SKILL.md index bfe3f12..e1b22cb 100644 --- a/skills/adversarial-review/SKILL.md +++ b/skills/adversarial-review/SKILL.md @@ -1,6 +1,6 @@ --- name: adversarial-review -description: 'Run a design-challenging Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model , [focus text]. Use only when the user wants stronger scrutiny than a normal review, such as explicit tradeoff challenge, risky-change review, or custom focus text.' +description: 'Run a design-challenging Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model , --effort , [focus text]. Defaults to opus + xhigh effort. Use only when the user wants stronger scrutiny than a normal review, such as explicit tradeoff challenge, risky-change review, or custom focus text.' --- # Claude Code Adversarial Review @@ -16,7 +16,7 @@ Unlike `$cc:review`, this skill accepts custom focus text after the flags. The m Do not derive the companion path from this skill file or any cache directory. Always run the installed copy: `node "/scripts/claude-companion.mjs" adversarial-review ...` -Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model `, plus optional focus text after the flags +Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model `, `--effort `, plus optional focus text after the flags (defaults: model=opus, effort=xhigh; sonnet defaults to high; haiku has no effort) Raw slash-command arguments: `$ARGUMENTS` diff --git a/skills/rescue/SKILL.md b/skills/rescue/SKILL.md index cd160ca..c8b1c5b 100644 --- a/skills/rescue/SKILL.md +++ b/skills/rescue/SKILL.md @@ -1,6 +1,6 @@ --- name: rescue -description: 'Delegate a substantial diagnosis, implementation, or follow-up task to Claude Code through the tracked-job runtime. Args: --background, --wait, --resume, --resume-last, --fresh, --write, --model , --effort , --prompt-file , [task text]. Use when Claude should investigate or change things, not when the user only wants review findings.' +description: 'Delegate a substantial diagnosis, implementation, or follow-up task to Claude Code through the tracked-job runtime. Args: --background, --wait, --resume, --resume-last, --fresh, --write, --model , --effort , --prompt-file , [task text]. Defaults to opus + xhigh effort. Use when Claude should investigate or change things, not when the user only wants review findings.' --- # Claude Code Rescue @@ -22,7 +22,7 @@ Do not derive the companion path from this skill file or any cache directory. Al Raw slash-command arguments: `$ARGUMENTS` -Supported arguments: `--background`, `--wait`, `--resume`, `--resume-last`, `--fresh`, `--write`, `--model `, `--effort `, `--prompt-file `, plus free-text task text +Supported arguments: `--background`, `--wait`, `--resume`, `--resume-last`, `--fresh`, `--write`, `--model `, `--effort `, `--prompt-file `, plus free-text task text Main-thread routing rules: - If the user explicitly invoked `$cc:rescue` or `Claude Code Rescue`, do not keep the work in the main Codex thread. Delegate it. diff --git a/skills/review/SKILL.md b/skills/review/SKILL.md index 0350478..a2d05b1 100644 --- a/skills/review/SKILL.md +++ b/skills/review/SKILL.md @@ -1,6 +1,6 @@ --- name: review -description: 'Run a standard Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model . Use as the default path for ordinary code-review requests when the user did not explicitly ask for stronger adversarial scrutiny or for Claude to own the implementation work.' +description: 'Run a standard Claude Code review of local git changes in this repository. Args: --wait, --background, --base , --scope , --model , --effort . Defaults to opus + xhigh effort. Use as the default path for ordinary code-review requests when the user did not explicitly ask for stronger adversarial scrutiny or for Claude to own the implementation work.' --- # Claude Code Review @@ -16,7 +16,7 @@ If the overall request is "you review it too, also ask Claude to review in the b Do not derive the companion path from this skill file or any cache directory. Always run the installed copy: `node "/scripts/claude-companion.mjs" review ...` -Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model ` +Supported arguments: `--wait`, `--background`, `--base `, `--scope auto|working-tree|branch`, `--model `, `--effort ` (defaults: model=opus, effort=xhigh; sonnet defaults to high; haiku has no effort) Raw slash-command arguments: `$ARGUMENTS` diff --git a/tests/claude-cli.test.mjs b/tests/claude-cli.test.mjs index 5aea6f1..1063669 100644 --- a/tests/claude-cli.test.mjs +++ b/tests/claude-cli.test.mjs @@ -10,10 +10,14 @@ import { validateTurnCompletion, resolveModel, resolveEffort, + resolveDefaultModel, + resolveDefaultEffort, buildArgs, MODEL_ALIASES, EFFORT_ALIASES, VALID_EFFORTS, + DEFAULT_MODEL, + DEFAULT_EFFORT_BY_MODEL, SANDBOX_READ_ONLY_BASH_TOOLS, SANDBOX_READ_ONLY_TOOLS, SANDBOX_TEMP_DIR, @@ -423,8 +427,8 @@ describe("validateTurnCompletion", () => { // =========================================================================== describe("resolveModel", () => { - it("maps 'sonnet' to 'claude-sonnet-4-6'", () => { - assert.equal(resolveModel("sonnet"), "claude-sonnet-4-6"); + it("maps 'sonnet' to the 1M variant 'claude-sonnet-4-6[1m]'", () => { + assert.equal(resolveModel("sonnet"), "claude-sonnet-4-6[1m]"); }); it("maps 'haiku' to 'claude-haiku-4-5'", () => { @@ -444,13 +448,87 @@ describe("resolveModel", () => { assert.equal(resolveModel(""), undefined); }); + it("maps 'opus' to the 1M variant 'claude-opus-4-7[1m]'", () => { + assert.equal(resolveModel("opus"), "claude-opus-4-7[1m]"); + }); + it("MODEL_ALIASES map has expected entries", () => { - assert.equal(MODEL_ALIASES.size, 2); + assert.equal(MODEL_ALIASES.size, 3); + assert.ok(MODEL_ALIASES.has("opus")); assert.ok(MODEL_ALIASES.has("sonnet")); assert.ok(MODEL_ALIASES.has("haiku")); }); }); +// =========================================================================== +// resolveDefaultModel / resolveDefaultEffort +// =========================================================================== + +describe("resolveDefaultModel", () => { + it("returns 'opus' when model is null/undefined/empty", () => { + assert.equal(resolveDefaultModel(null), "opus"); + assert.equal(resolveDefaultModel(undefined), "opus"); + assert.equal(resolveDefaultModel(""), "opus"); + assert.equal(resolveDefaultModel(" "), "opus"); + }); + + it("passes through an explicit model", () => { + assert.equal(resolveDefaultModel("sonnet"), "sonnet"); + assert.equal(resolveDefaultModel("haiku"), "haiku"); + assert.equal(resolveDefaultModel("claude-opus-4-7"), "claude-opus-4-7"); + }); + + it("exposes DEFAULT_MODEL constant as 'opus'", () => { + assert.equal(DEFAULT_MODEL, "opus"); + }); +}); + +describe("resolveDefaultEffort", () => { + it("defaults to xhigh for opus alias and resolved id (including 1M variant)", () => { + assert.equal(resolveDefaultEffort("opus", null), "xhigh"); + assert.equal(resolveDefaultEffort("claude-opus-4-7", null), "xhigh"); + assert.equal(resolveDefaultEffort("claude-opus-4-7[1m]", null), "xhigh"); + assert.equal(resolveDefaultEffort("OPUS", undefined), "xhigh"); + }); + + it("defaults to high for sonnet alias and resolved id (including 1M variant)", () => { + assert.equal(resolveDefaultEffort("sonnet", null), "high"); + assert.equal(resolveDefaultEffort("claude-sonnet-4-6", null), "high"); + assert.equal(resolveDefaultEffort("claude-sonnet-4-6[1m]", null), "high"); + }); + + it("returns undefined for haiku (no effort default)", () => { + assert.equal(resolveDefaultEffort("haiku", null), undefined); + assert.equal(resolveDefaultEffort("claude-haiku-4-5", undefined), undefined); + }); + + it("returns undefined for unknown model when effort not provided", () => { + assert.equal(resolveDefaultEffort("some-future-model", null), undefined); + }); + + it("preserves an explicit effort regardless of model", () => { + assert.equal(resolveDefaultEffort("opus", "low"), "low"); + assert.equal(resolveDefaultEffort("sonnet", "medium"), "medium"); + assert.equal(resolveDefaultEffort("haiku", "high"), "high"); + assert.equal(resolveDefaultEffort(null, "max"), "max"); + }); + + it("treats blank effort as missing", () => { + assert.equal(resolveDefaultEffort("opus", ""), "xhigh"); + assert.equal(resolveDefaultEffort("opus", " "), "xhigh"); + }); + + it("DEFAULT_EFFORT_BY_MODEL contains the expected entries", () => { + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("opus"), "xhigh"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-opus-4-7"), "xhigh"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-opus-4-7[1m]"), "xhigh"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("sonnet"), "high"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-sonnet-4-6"), "high"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.get("claude-sonnet-4-6[1m]"), "high"); + assert.equal(DEFAULT_EFFORT_BY_MODEL.has("haiku"), false); + }); +}); + // =========================================================================== // resolveEffort // =========================================================================== @@ -476,8 +554,8 @@ describe("resolveEffort", () => { assert.equal(resolveEffort("high"), "high"); }); - it("maps 'xhigh' to 'max'", () => { - assert.equal(resolveEffort("xhigh"), "max"); + it("passes 'xhigh' through as a first-class effort", () => { + assert.equal(resolveEffort("xhigh"), "xhigh"); }); it("maps 'max' to 'max'", () => { @@ -486,6 +564,7 @@ describe("resolveEffort", () => { it("normalizes canonical effort values to lowercase", () => { assert.equal(resolveEffort("HIGH"), "high"); + assert.equal(resolveEffort("XHIGH"), "xhigh"); }); it("throws on unsupported effort values", () => { @@ -500,19 +579,19 @@ describe("resolveEffort", () => { assert.equal(resolveEffort(undefined), undefined); }); - it("VALID_EFFORTS contains low, medium, high, max", () => { + it("VALID_EFFORTS contains low, medium, high, xhigh, max", () => { assert.ok(VALID_EFFORTS.has("low")); assert.ok(VALID_EFFORTS.has("medium")); assert.ok(VALID_EFFORTS.has("high")); + assert.ok(VALID_EFFORTS.has("xhigh")); assert.ok(VALID_EFFORTS.has("max")); - assert.equal(VALID_EFFORTS.size, 4); + assert.equal(VALID_EFFORTS.size, 5); }); it("EFFORT_ALIASES only contains legacy compatibility mappings", () => { assert.deepEqual(EFFORT_ALIASES, { none: "low", minimal: "low", - xhigh: "max", }); }); }); @@ -564,13 +643,20 @@ describe("buildArgs", () => { const args = buildArgs("p", { model: "sonnet" }); const idx = args.indexOf("--model"); assert.ok(idx >= 0); - assert.equal(args[idx + 1], "claude-sonnet-4-6"); + assert.equal(args[idx + 1], "claude-sonnet-4-6[1m]"); }); it("includes --effort with resolved effort", () => { const args = buildArgs("p", { effort: "xhigh" }); const idx = args.indexOf("--effort"); assert.ok(idx >= 0); + assert.equal(args[idx + 1], "xhigh"); + }); + + it("passes 'max' through as --effort max when explicitly requested", () => { + const args = buildArgs("p", { effort: "max" }); + const idx = args.indexOf("--effort"); + assert.ok(idx >= 0); assert.equal(args[idx + 1], "max"); }); @@ -669,11 +755,14 @@ describe("SANDBOX_SETTINGS", () => { assert.ok("workspace-write" in SANDBOX_SETTINGS); }); - it("read-only enables sandbox with allowWrite [SANDBOX_TEMP_DIR]", () => { + it("read-only enables sandbox with allowWrite [SANDBOX_TEMP_DIR] and unrestricted network", () => { const s = SANDBOX_SETTINGS["read-only"].sandbox; assert.equal(s.enabled, true); assert.deepEqual(s.filesystem.allowWrite, [SANDBOX_TEMP_DIR]); - assert.deepEqual(s.network.allowedDomains, []); + // network is intentionally omitted so that WebFetch/WebSearch and Claude's + // own API path remain reachable; review safety comes from removing Bash + // from the allowlist instead. + assert.equal(s.network, undefined); }); it("workspace-write enables sandbox with allowWrite ['.', SANDBOX_TEMP_DIR]", () => { diff --git a/tests/mcp-git.test.mjs b/tests/mcp-git.test.mjs new file mode 100644 index 0000000..ed77f36 --- /dev/null +++ b/tests/mcp-git.test.mjs @@ -0,0 +1,376 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { + TOOL_DEFINITIONS, + MCP_PROTOCOL_VERSION, + MCP_SERVER_NAME, + handleMcpRequest, +} from "../scripts/lib/mcp-git.mjs"; + +// --------------------------------------------------------------------------- +// Fixture: a tiny git repo with two commits +// --------------------------------------------------------------------------- + +let repoRoot; + +function git(cwd, args) { + const result = spawnSync("git", args, { + cwd, + encoding: "utf8", + env: { + ...process.env, + GIT_AUTHOR_NAME: "Test", + GIT_AUTHOR_EMAIL: "test@example.com", + GIT_COMMITTER_NAME: "Test", + GIT_COMMITTER_EMAIL: "test@example.com", + GIT_TERMINAL_PROMPT: "0", + }, + }); + if (result.status !== 0) { + throw new Error( + `git ${args.join(" ")} failed: ${result.stderr ?? result.stdout ?? "unknown"}` + ); + } + return result; +} + +before(() => { + repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), "cc-mcp-git-test-")); + git(repoRoot, ["init", "-q", "-b", "main"]); + fs.writeFileSync(path.join(repoRoot, "hello.txt"), "hello\nworld\n"); + fs.writeFileSync(path.join(repoRoot, "guide.md"), "# Guide\nfirst line.\n"); + git(repoRoot, ["add", "."]); + git(repoRoot, ["commit", "-q", "-m", "initial commit"]); + fs.writeFileSync(path.join(repoRoot, "hello.txt"), "hello\nworld\nadded line\n"); + git(repoRoot, ["add", "."]); + git(repoRoot, ["commit", "-q", "-m", "tweak hello"]); +}); + +after(() => { + if (repoRoot) { + fs.rmSync(repoRoot, { recursive: true, force: true }); + } +}); + +// --------------------------------------------------------------------------- +// JSON-RPC dispatch +// --------------------------------------------------------------------------- + +describe("handleMcpRequest — initialize / tools/list", () => { + it("responds to initialize with server info + capabilities", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", id: 1, method: "initialize", params: {} }, + "/tmp" + ); + assert.equal(res.id, 1); + assert.equal(res.result.protocolVersion, MCP_PROTOCOL_VERSION); + assert.equal(res.result.serverInfo.name, MCP_SERVER_NAME); + assert.ok(res.result.capabilities.tools !== undefined); + }); + + it("lists all expected tools", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", id: 2, method: "tools/list" }, + "/tmp" + ); + const names = res.result.tools.map((t) => t.name).sort(); + assert.deepEqual(names, [ + "blame", + "diff", + "grep", + "log", + "ls_files", + "show", + "status", + ]); + for (const tool of res.result.tools) { + assert.equal(typeof tool.description, "string"); + assert.equal(tool.inputSchema.type, "object"); + } + }); + + it("returns method-not-found for unknown methods", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", id: 3, method: "wat/bogus" }, + "/tmp" + ); + assert.equal(res.error.code, -32601); + }); + + it("returns null (no response) for the initialized notification", () => { + const res = handleMcpRequest( + { jsonrpc: "2.0", method: "notifications/initialized" }, + "/tmp" + ); + assert.equal(res, null); + }); + + it("returns Invalid Request (-32600) when payload is not a JSON object", () => { + for (const bogus of [null, [], "string", 42, true]) { + const res = handleMcpRequest(bogus, "/tmp"); + assert.equal(res.error.code, -32600); + assert.equal(res.id, null); + } + }); +}); + +// --------------------------------------------------------------------------- +// Tool handlers (round-trip via handleMcpRequest) +// --------------------------------------------------------------------------- + +function callTool(name, args = {}) { + return handleMcpRequest( + { + jsonrpc: "2.0", + id: 99, + method: "tools/call", + params: { name, arguments: args }, + }, + repoRoot + ); +} + +describe("tool: status", () => { + it("returns clean working tree text", () => { + const res = callTool("status", {}); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /clean|nothing to commit/i); + }); + + it("returns porcelain output", () => { + fs.writeFileSync(path.join(repoRoot, "untracked.txt"), "x"); + try { + const res = callTool("status", { porcelain: true }); + assert.match(res.result.content[0].text, /\?\? untracked\.txt/); + } finally { + fs.unlinkSync(path.join(repoRoot, "untracked.txt")); + } + }); +}); + +describe("tool: log", () => { + it("returns oneline log limited by limit", () => { + const res = callTool("log", { limit: 1, format: "oneline" }); + assert.equal(res.result.isError, false); + const lines = res.result.content[0].text.trim().split("\n"); + assert.equal(lines.length, 1); + }); + + it("rejects unknown formats", () => { + const res = callTool("log", { format: "fancy" }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /format must be/); + }); + + it("rejects refs with shell metacharacters", () => { + const res = callTool("log", { refs: "HEAD; rm -rf /" }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /refs/); + }); + + it("rejects refs that start with '-' (would be interpreted as a git flag)", () => { + for (const ref of ["-p", "--ext-diff", "--help", "-"]) { + const res = callTool("log", { refs: ref }); + assert.equal(res.result.isError, true, `should reject ${ref}`); + assert.match(res.result.content[0].text, /flag|-/); + } + }); + + it("accepts dotted ref ranges", () => { + const res = callTool("log", { refs: "HEAD~1..HEAD", limit: 5 }); + assert.equal(res.result.isError, false); + }); +}); + +describe("tool: diff", () => { + it("returns diff between HEAD~1 and HEAD", () => { + const res = callTool("diff", { refs: "HEAD~1..HEAD" }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /\+added line/); + }); + + it("stat: true returns --stat summary", () => { + const res = callTool("diff", { refs: "HEAD~1..HEAD", stat: true }); + assert.match(res.result.content[0].text, /hello\.txt/); + assert.doesNotMatch(res.result.content[0].text, /\+added line/); + }); + + it("rejects paths that escape the git root", () => { + const res = callTool("diff", { paths: ["../escape"] }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /escape|forbidden/i); + }); + + it("rejects paths that look like flags", () => { + const res = callTool("diff", { paths: ["--exec=evil"] }); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /flag/i); + }); + + it("rejects paths with shell metacharacters", () => { + const res = callTool("diff", { paths: ["a;rm"] }); + assert.equal(res.result.isError, true); + }); +}); + +describe("tool: show", () => { + it("requires ref", () => { + const res = callTool("show", {}); + assert.equal(res.result.isError, true); + assert.match(res.result.content[0].text, /ref/); + }); + + it("shows HEAD content with stat", () => { + const res = callTool("show", { ref: "HEAD", stat: true }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /hello\.txt/); + }); +}); + +describe("tool: grep", () => { + it("finds matches across tracked files", () => { + const res = callTool("grep", { pattern: "world" }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /hello\.txt:\d+:world/); + }); + + it("requires a non-empty pattern", () => { + const res = callTool("grep", { pattern: "" }); + assert.equal(res.result.isError, true); + }); + + it("rejects null bytes in pattern", () => { + const res = callTool("grep", { pattern: `a${String.fromCharCode(0)}b` }); + assert.equal(res.result.isError, true); + }); +}); + +describe("tool: blame", () => { + it("requires path", () => { + const res = callTool("blame", {}); + assert.equal(res.result.isError, true); + }); + + it("returns blame output", () => { + const res = callTool("blame", { path: "hello.txt" }); + assert.equal(res.result.isError, false); + assert.match(res.result.content[0].text, /hello/); + }); + + it("accepts numeric range", () => { + const res = callTool("blame", { path: "hello.txt", range: "1,2" }); + assert.equal(res.result.isError, false); + }); + + it("rejects non-numeric range", () => { + const res = callTool("blame", { path: "hello.txt", range: "a,b" }); + assert.equal(res.result.isError, true); + }); +}); + +describe("tool: ls_files", () => { + it("lists tracked files in the worktree", () => { + const res = callTool("ls_files", {}); + assert.equal(res.result.isError, false); + const text = res.result.content[0].text; + assert.match(text, /hello\.txt/); + assert.match(text, /guide\.md/); + }); +}); + +// --------------------------------------------------------------------------- +// Tool registry shape +// --------------------------------------------------------------------------- + +describe("TOOL_DEFINITIONS", () => { + it("every tool has a name, description, and inputSchema", () => { + for (const t of TOOL_DEFINITIONS) { + assert.equal(typeof t.name, "string"); + assert.equal(typeof t.description, "string"); + assert.equal(t.inputSchema.type, "object"); + } + }); +}); + +// --------------------------------------------------------------------------- +// Stdio framing — runMcpGitServer end-to-end via the companion subcommand +// --------------------------------------------------------------------------- + +describe("runMcpGitServer stdio framing", () => { + const companion = path.join( + path.dirname(new URL(import.meta.url).pathname), + "..", + "scripts", + "claude-companion.mjs" + ); + + function runServer(inputLines, env = {}) { + const child = spawnSync(process.execPath, [companion, "mcp-git"], { + input: inputLines.map((line) => `${line}\n`).join(""), + encoding: "utf8", + env: { ...process.env, CC_GIT_ROOT: repoRoot, ...env }, + timeout: 10_000, + }); + return child; + } + + it("emits a response for a valid initialize request", () => { + const child = runServer([ + JSON.stringify({ jsonrpc: "2.0", id: 1, method: "initialize", params: {} }), + ]); + assert.equal(child.status, 0); + const lines = child.stdout.trim().split("\n").filter(Boolean); + assert.equal(lines.length, 1); + const res = JSON.parse(lines[0]); + assert.equal(res.id, 1); + assert.ok(res.result?.serverInfo); + }); + + it("emits an Invalid Request error even when the payload is not an object", () => { + const child = runServer(["[1,2,3]", "true", "\"hello\"", "42"]); + assert.equal(child.status, 0); + const lines = child.stdout.trim().split("\n").filter(Boolean); + // One -32600 response per malformed payload. + assert.equal(lines.length, 4); + for (const line of lines) { + const res = JSON.parse(line); + assert.equal(res.error.code, -32600); + assert.equal(res.id, null); + } + }); + + it("emits Parse error for non-JSON lines", () => { + const child = runServer(["this is not json"]); + assert.equal(child.status, 0); + const res = JSON.parse(child.stdout.trim()); + assert.equal(res.error.code, -32700); + }); + + it("does not respond to notifications (no id)", () => { + const child = runServer([ + JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized" }), + ]); + assert.equal(child.status, 0); + assert.equal(child.stdout.trim(), ""); + }); + + it("exits non-zero when CC_GIT_ROOT is missing", () => { + const child = spawnSync(process.execPath, [companion, "mcp-git"], { + input: "", + encoding: "utf8", + env: { ...process.env, CC_GIT_ROOT: "" }, + timeout: 5_000, + }); + assert.notEqual(child.status, 0); + assert.match(child.stderr, /CC_GIT_ROOT/); + }); +}); diff --git a/tests/review-worktree.test.mjs b/tests/review-worktree.test.mjs new file mode 100644 index 0000000..19a4fec --- /dev/null +++ b/tests/review-worktree.test.mjs @@ -0,0 +1,217 @@ +/** + * Copyright 2026 Sendbird, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, it, before, after } from "node:test"; +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { + createReviewIsolation, + createReviewWorktree, + pruneStaleReviewWorktrees, + resolveBaseRef, +} from "../scripts/lib/review-worktree.mjs"; + +let repoRoot; +let previousCodexHome; +let previousHome; +let previousUserProfile; +let tempHome; + +function git(cwd, args) { + const result = spawnSync("git", args, { + cwd, + encoding: "utf8", + env: { + ...process.env, + GIT_AUTHOR_NAME: "Test", + GIT_AUTHOR_EMAIL: "test@example.com", + GIT_COMMITTER_NAME: "Test", + GIT_COMMITTER_EMAIL: "test@example.com", + GIT_TERMINAL_PROMPT: "0", + }, + }); + if (result.status !== 0) { + throw new Error( + `git ${args.join(" ")} failed: ${result.stderr ?? result.stdout ?? "unknown"}` + ); + } + return result; +} + +before(() => { + // Sandbox the plugin runtime root so we don't touch the real ~/.codex. + tempHome = fs.mkdtempSync(path.join(os.tmpdir(), "cc-worktree-home-")); + previousHome = process.env.HOME; + previousUserProfile = process.env.USERPROFILE; + previousCodexHome = process.env.CODEX_HOME; + process.env.HOME = tempHome; + process.env.USERPROFILE = tempHome; + process.env.CODEX_HOME = path.join(tempHome, ".codex"); + + repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), "cc-worktree-repo-")); + git(repoRoot, ["init", "-q", "-b", "main"]); + fs.writeFileSync(path.join(repoRoot, "file.txt"), "content\n"); + git(repoRoot, ["add", "."]); + git(repoRoot, ["commit", "-q", "-m", "initial"]); +}); + +after(() => { + if (previousHome == null) delete process.env.HOME; + else process.env.HOME = previousHome; + if (previousUserProfile == null) delete process.env.USERPROFILE; + else process.env.USERPROFILE = previousUserProfile; + if (previousCodexHome == null) delete process.env.CODEX_HOME; + else process.env.CODEX_HOME = previousCodexHome; + if (tempHome) fs.rmSync(tempHome, { recursive: true, force: true }); + if (repoRoot) fs.rmSync(repoRoot, { recursive: true, force: true }); +}); + +// --------------------------------------------------------------------------- + +describe("resolveBaseRef", () => { + it("returns a 40-char SHA for HEAD", () => { + const sha = resolveBaseRef(repoRoot, "HEAD"); + assert.match(sha, /^[a-f0-9]{40}$/); + }); + + it("falls back to HEAD when the requested ref contains forbidden chars", () => { + const sha = resolveBaseRef(repoRoot, "HEAD; rm -rf /"); + assert.match(sha, /^[a-f0-9]{40}$/); + }); + + it("throws when the ref does not exist", () => { + assert.throws( + () => resolveBaseRef(repoRoot, "nonexistent-ref-xyz"), + /git rev-parse/ + ); + }); +}); + +describe("createReviewWorktree", () => { + it("creates a worktree at HEAD with checked-out files", () => { + const wt = createReviewWorktree(repoRoot, { label: "unit" }); + try { + assert.ok(fs.existsSync(wt.path)); + assert.ok(fs.existsSync(path.join(wt.path, "file.txt"))); + assert.match(wt.commit, /^[a-f0-9]{40}$/); + } finally { + wt.cleanup(); + } + }); + + it("cleanup removes the worktree directory", () => { + const wt = createReviewWorktree(repoRoot); + assert.ok(fs.existsSync(wt.path)); + wt.cleanup(); + assert.equal(fs.existsSync(wt.path), false); + }); + + it("cleanup is idempotent", () => { + const wt = createReviewWorktree(repoRoot); + wt.cleanup(); + wt.cleanup(); // should not throw + assert.equal(fs.existsSync(wt.path), false); + }); + + it("uses a sanitized slug derived from the label", () => { + const wt = createReviewWorktree(repoRoot, { label: "review" }); + try { + assert.match(path.basename(wt.path), /^review-/); + } finally { + wt.cleanup(); + } + }); + + it("falls back to a safe slug when label contains illegal characters", () => { + const wt = createReviewWorktree(repoRoot, { label: "rev iew/../bad" }); + try { + assert.match(path.basename(wt.path), /^review-/); + } finally { + wt.cleanup(); + } + }); + + it("each invocation creates a unique path", () => { + const a = createReviewWorktree(repoRoot, { label: "a" }); + const b = createReviewWorktree(repoRoot, { label: "a" }); + try { + assert.notEqual(a.path, b.path); + } finally { + a.cleanup(); + b.cleanup(); + } + }); +}); + +describe("createReviewIsolation", () => { + it("returns the original repoRoot for working-tree mode (no worktree)", () => { + const iso = createReviewIsolation(repoRoot, { mode: "working-tree", label: "wt" }); + try { + assert.equal(iso.cwd, repoRoot); + assert.equal(iso.gitRoot, repoRoot); + assert.equal(iso.isolated, false); + } finally { + iso.cleanup(); + } + }); + + it("does not create a worktree directory for working-tree mode", () => { + const root = path.join( + process.env.CODEX_HOME, + "plugins", "data", "cc", "runtime", "review-worktrees" + ); + const countEntries = () => { + try { + return fs.readdirSync(root, { withFileTypes: true }).length; + } catch { + return 0; + } + }; + const before = countEntries(); + const iso = createReviewIsolation(repoRoot, { mode: "working-tree" }); + const after = countEntries(); + assert.equal(after, before); + iso.cleanup(); + }); + + it("creates and cleans up an ephemeral worktree for branch mode", () => { + const iso = createReviewIsolation(repoRoot, { mode: "branch" }, { label: "iso-branch" }); + try { + assert.notEqual(iso.cwd, repoRoot); + assert.equal(iso.gitRoot, iso.cwd); + assert.equal(iso.isolated, true); + assert.ok(fs.existsSync(iso.cwd)); + } finally { + iso.cleanup(); + assert.equal(fs.existsSync(iso.cwd), false); + } + }); +}); + +describe("pruneStaleReviewWorktrees", () => { + it("removes worktree dirs older than the threshold", () => { + const wt = createReviewWorktree(repoRoot, { label: "stale" }); + // Don't call wt.cleanup() — simulate a crashed run. + // Force the directory's mtime backward by 7 hours. + const oldTime = (Date.now() - 7 * 60 * 60 * 1000) / 1000; + fs.utimesSync(wt.path, oldTime, oldTime); + assert.ok(fs.existsSync(wt.path)); + pruneStaleReviewWorktrees(repoRoot, { maxAgeMs: 6 * 60 * 60 * 1000 }); + assert.equal(fs.existsSync(wt.path), false); + }); + + it("leaves fresh worktree dirs alone", () => { + const wt = createReviewWorktree(repoRoot, { label: "fresh" }); + try { + pruneStaleReviewWorktrees(repoRoot, { maxAgeMs: 6 * 60 * 60 * 1000 }); + assert.ok(fs.existsSync(wt.path)); + } finally { + wt.cleanup(); + } + }); +}); diff --git a/tests/sandbox-modes.test.mjs b/tests/sandbox-modes.test.mjs index 6b44a25..12ddf23 100644 --- a/tests/sandbox-modes.test.mjs +++ b/tests/sandbox-modes.test.mjs @@ -12,10 +12,16 @@ import { buildArgs, SANDBOX_READ_ONLY_BASH_TOOLS, SANDBOX_READ_ONLY_TOOLS, + SANDBOX_REVIEW_TOOLS, SANDBOX_TEMP_DIR, SANDBOX_SETTINGS, + REVIEW_MCP_SERVER_NAME, + REVIEW_MCP_TOOL_NAMES, + REVIEW_MCP_ALLOWED_TOOLS, createSandboxSettings, cleanupSandboxSettings, + createReviewMcpConfig, + cleanupReviewMcpConfig, } from "../scripts/lib/claude-cli.mjs"; import { resolvePluginRuntimeRoot } from "../scripts/lib/codex-paths.mjs"; @@ -202,12 +208,15 @@ describe("sandbox settings lifecycle", () => { // --------------------------------------------------------------------------- describe("sandbox settings content", () => { - it("read-only: sandbox enabled, allowWrite temp dir only, no network", () => { + it("read-only: sandbox enabled, allowWrite temp dir only, network unrestricted", () => { const s = SANDBOX_SETTINGS["read-only"]; assert.equal(s.sandbox.enabled, true); - assert.equal(s.sandbox.autoAllowBashIfSandboxed, true); + assert.equal(s.sandbox.autoAllowBashIfSandboxed, false); assert.deepEqual(s.sandbox.filesystem.allowWrite, [SANDBOX_TEMP_DIR]); - assert.deepEqual(s.sandbox.network.allowedDomains, []); + // network block intentionally omitted: review/adversarial-review need network + // for WebFetch/WebSearch and the Claude CLI's own API access. Mutation + // surfaces are closed off by removing Bash from the allowlist instead. + assert.equal(s.sandbox.network, undefined); }); it("workspace-write: sandbox enabled, allowWrite cwd+temp dir, no network", () => { @@ -223,7 +232,9 @@ describe("sandbox settings content", () => { SANDBOX_SETTINGS["read-only"].sandbox.filesystem, SANDBOX_SETTINGS["workspace-write"].sandbox.filesystem ); - assert.deepEqual( + // read-only deliberately leaves network unset (allowed) while workspace-write + // explicitly closes outbound network from Bash. The two modes diverge here. + assert.notDeepEqual( SANDBOX_SETTINGS["read-only"].sandbox.network, SANDBOX_SETTINGS["workspace-write"].sandbox.network ); @@ -256,3 +267,102 @@ describe("mode consistency", () => { assert.equal(argsAllowedTools(args).length, 0); }); }); + +// --------------------------------------------------------------------------- +// 6. Review tool surface — MCP-only git access, no Bash +// --------------------------------------------------------------------------- + +describe("SANDBOX_REVIEW_TOOLS", () => { + it("includes Read, Glob, Grep, WebSearch, WebFetch", () => { + for (const t of ["Read", "Glob", "Grep", "WebSearch", "WebFetch"]) { + assert.ok(SANDBOX_REVIEW_TOOLS.includes(t), `missing ${t}`); + } + }); + + it("does NOT include any Bash entry (Bash patterns are not strictly enforced)", () => { + for (const t of SANDBOX_REVIEW_TOOLS) { + assert.ok( + !/^Bash(\(|$)/.test(t), + `review allowlist must not contain Bash: ${t}` + ); + } + }); + + it("does NOT include Write/Edit/MultiEdit/NotebookEdit/Task", () => { + for (const t of ["Write", "Edit", "MultiEdit", "NotebookEdit", "Task"]) { + assert.ok(!SANDBOX_REVIEW_TOOLS.includes(t), `${t} must not be allowed`); + } + }); + + it("exposes the bundled git MCP tools as mcp____ entries", () => { + for (const name of REVIEW_MCP_TOOL_NAMES) { + const expected = `mcp__${REVIEW_MCP_SERVER_NAME}__${name}`; + assert.ok( + SANDBOX_REVIEW_TOOLS.includes(expected), + `missing MCP tool entry: ${expected}` + ); + assert.ok(REVIEW_MCP_ALLOWED_TOOLS.includes(expected)); + } + }); + + it("REVIEW_MCP_TOOL_NAMES covers diff, log, show, blame, status, grep, ls_files", () => { + for (const expected of ["diff", "log", "show", "blame", "status", "grep", "ls_files"]) { + assert.ok(REVIEW_MCP_TOOL_NAMES.includes(expected), `missing ${expected}`); + } + }); +}); + +// --------------------------------------------------------------------------- +// 7. createReviewMcpConfig +// --------------------------------------------------------------------------- + +describe("createReviewMcpConfig", () => { + it("writes a JSON file that registers the gitReview MCP server", () => { + withTempCodexHome(() => { + const filePath = createReviewMcpConfig("/tmp/cc-review-fake-root"); + try { + const content = JSON.parse(fs.readFileSync(filePath, "utf8")); + assert.ok(content.mcpServers); + const server = content.mcpServers[REVIEW_MCP_SERVER_NAME]; + assert.ok(server, "gitReview server must be present"); + assert.equal(typeof server.command, "string"); + assert.ok(Array.isArray(server.args)); + assert.ok(server.args.some((a) => a.endsWith("claude-companion.mjs"))); + assert.ok(server.args.includes("mcp-git")); + assert.equal(server.env.CC_GIT_ROOT, "/tmp/cc-review-fake-root"); + } finally { + cleanupReviewMcpConfig(filePath); + } + }); + }); + + it("rejects missing/blank gitRoot", () => { + withTempCodexHome(() => { + assert.throws(() => createReviewMcpConfig(""), /gitRoot is required/); + assert.throws(() => createReviewMcpConfig(null), /gitRoot is required/); + assert.throws(() => createReviewMcpConfig(undefined), /gitRoot is required/); + }); + }); +}); + +// --------------------------------------------------------------------------- +// 8. buildArgs — review mode (mcpConfigFile + strictMcpConfig + new allowlist) +// --------------------------------------------------------------------------- + +describe("buildArgs review mode", () => { + it("emits --mcp-config when mcpConfigFile is provided", () => { + const args = buildArgs("p", { mcpConfigFile: "/tmp/mcp.json" }); + assert.ok(argsHas(args, "--mcp-config", "/tmp/mcp.json")); + }); + + it("emits --strict-mcp-config when strictMcpConfig is set", () => { + const args = buildArgs("p", { strictMcpConfig: true }); + assert.ok(args.includes("--strict-mcp-config")); + }); + + it("can express the review allowlist via --allowedTools per entry", () => { + const args = buildArgs("p", { allowedTools: SANDBOX_REVIEW_TOOLS }); + const allowed = argsAllowedTools(args); + assert.deepEqual([...allowed].sort(), [...SANDBOX_REVIEW_TOOLS].sort()); + }); +});