cockroachdb · fantapop · Mar 27, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -8,4 +8,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
-      - run: ./test.sh
+      - uses: actions/setup-go@v6
+        with:
+          go-version-file: autosolve/go.mod
+      - name: Run shell tests
+        run: ./test.sh
+      - name: Run Go tests
+        run: cd autosolve && go test ./... -count=1
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,11 @@ Breaking changes are prefixed with "Breaking Change: ".
 - `autotag-from-changelog` now exposes `tag_created` and `tag` outputs so
   callers can react to whether a new tag was pushed.
 - `expect_step_output` test helper for asserting GitHub Actions step outputs.
+- `autosolve/assess` action: evaluate tasks for automated resolution suitability
+  using Claude in read-only mode.
+- `autosolve/implement` action: autonomously implement solutions, validate
+  security, push to fork, and create PRs using Claude. Includes AI security
+  review, token usage tracking, and per-file batched diff analysis.
 - `get-workflow-ref` action: resolve the ref a caller used to invoke a reusable
   workflow by parsing the caller's workflow file — no API calls or extra
   permissions needed.

diff --git a/autosolve/Makefile b/autosolve/Makefile
@@ -0,0 +1,11 @@
+.PHONY: build test clean
+
+# Local dev binary
+build:
+	go build -o autosolve ./cmd/autosolve
+
+test:
+	go test ./... -count=1
+
+clean:
+	rm -f autosolve
diff --git a/autosolve/assess/action.yml b/autosolve/assess/action.yml
@@ -0,0 +1,108 @@
+name: Autosolve Assess
+description: Run Claude in read-only mode to assess whether a task is suitable for automated resolution.
+
+inputs:
+  claude_cli_version:
+    description: "Claude CLI version to install (e.g. '2.1.79' or 'latest')."
+    required: false
+    default: "2.1.79"
+  system_prompt:
+    description: >
+      Trusted instructions for Claude describing the task to assess.
+      Do not embed untrusted user input (e.g., issue titles/bodies) here.
+      Pass user-supplied data via environment variables and list them in context_vars.
+    required: false
+    default: ""
+  skill:
+    description: Path to a skill/prompt file relative to the repo root.
+    required: false
+    default: ""
+  context_vars:
+    description: >
+      Comma-separated list of environment variable names to pass through to Claude.
+      Use this to provide untrusted user input (e.g., issue titles/bodies) safely.
+      Claude is automatically told which variables are available and instructed to
+      read them — you do not need to reference them in system_prompt.
+      Claude will only have access to these variables plus a baseline set of
+      system and authentication variables (PATH, HOME, etc.).
+    required: false
+    default: ""
+  assessment_criteria:
+    description: Custom criteria for the assessment. If not provided, uses default criteria.
+    required: false
+    default: ""
+  model:
+    description: Claude model ID.
+    required: false
+    default: "claude-opus-4-6"
+  blocked_paths:
+    description: >
+      Comma-separated path prefixes that cannot be modified.
+      .github/ is always blocked and cannot be removed.
+    required: false
+    default: ".github/workflows/"
+  verbose_logging:
+    description: >
+      Log full Claude output in collapsible groups in the step log.
+      Logs may contain source code snippets, environment variable
+      values, or other repository content quoted in Claude's responses.
+      Security review output is never logged regardless of this setting.
+    required: false
+    default: "false"
+  working_directory:
+    description: Directory to run in (relative to workspace root). Defaults to workspace root.
+    required: false
+    default: "."
+
+outputs:
+  assessment:
+    description: PROCEED or SKIP
+    value: ${{ steps.assess.outputs.assessment }}
+  summary:
+    description: Human-readable assessment reasoning.
+    value: ${{ steps.assess.outputs.summary }}
+  result:
+    description: Full Claude result text.
+    value: ${{ steps.assess.outputs.result }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Claude CLI
+      shell: bash
+      run: |
+        if command -v roachdev >/dev/null; then
+          printf '#!/bin/sh\nexec roachdev claude -- "$@"\n' > /usr/local/bin/claude
+          chmod +x /usr/local/bin/claude
+          echo "Claude CLI: using roachdev wrapper"
+        else
+          curl --fail --silent --show-error --location https://claude.ai/install.sh | bash -s -- "$CLAUDE_CLI_VERSION"
+          echo "Claude CLI installed: $(claude --version)"
+        fi
+      env:
+        CLAUDE_CLI_VERSION: ${{ inputs.claude_cli_version }}
+
+    - name: Set up Go
+      uses: actions/setup-go@v6
+      with:
+        go-version-file: ${{ github.action_path }}/../go.mod
+        cache: false
+
+    - name: Build autosolve
+      shell: bash
+      run: go build -trimpath -o "$RUNNER_TEMP/autosolve" ./cmd/autosolve
+      working-directory: ${{ github.action_path }}/..
+
+    - name: Run assessment
+      id: assess
+      shell: bash
+      working-directory: ${{ inputs.working_directory }}
+      run: $RUNNER_TEMP/autosolve assess
+      env:
+        INPUT_SYSTEM_PROMPT: ${{ inputs.system_prompt }}
+        INPUT_SKILL: ${{ inputs.skill }}
+        INPUT_CONTEXT_VARS: ${{ inputs.context_vars }}
+        INPUT_ASSESSMENT_CRITERIA: ${{ inputs.assessment_criteria }}
+        INPUT_MODEL: ${{ inputs.model }}
+        INPUT_BLOCKED_PATHS: ${{ inputs.blocked_paths }}
+        INPUT_VERBOSE_LOGGING: ${{ inputs.verbose_logging }}
diff --git a/autosolve/cmd/autosolve/main.go b/autosolve/cmd/autosolve/main.go
@@ -0,0 +1,98 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/signal"
+
+	"github.com/cockroachdb/actions/autosolve/internal/action"
+	"github.com/cockroachdb/actions/autosolve/internal/assess"
+	"github.com/cockroachdb/actions/autosolve/internal/claude"
+	"github.com/cockroachdb/actions/autosolve/internal/config"
+	"github.com/cockroachdb/actions/autosolve/internal/git"
+	"github.com/cockroachdb/actions/autosolve/internal/github"
+	"github.com/cockroachdb/actions/autosolve/internal/implement"
+)
+
+const usage = `Usage: autosolve <command>
+
+Commands:
+  assess      Run assessment phase
+  implement   Run implementation phase
+`
+
+func main() {
+	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
+	defer cancel()
+
+	if len(os.Args) < 2 {
+		fatalf(usage)
+	}
+
+	var err error
+	switch os.Args[1] {
+	case "assess":
+		err = runAssess(ctx)
+	case "implement":
+		err = runImplement(ctx)
+	default:
+		fatalf("unknown command: %s\n\n%s", os.Args[1], usage)
+	}
+
+	if err != nil {
+		action.LogError(err.Error())
+		os.Exit(1)
+	}
+}
+
+func fatalf(format string, args ...any) {
+	fmt.Fprintf(os.Stderr, format+"\n", args...)
+	os.Exit(1)
+}
+
+func runAssess(ctx context.Context) error {
+	cfg, err := config.LoadAssessConfig()
+	if err != nil {
+		return err
+	}
+	if err := config.ValidateAuth(); err != nil {
+		return err
+	}
+	tmpDir, err := ensureTmpDir()
+	if err != nil {
+		return err
+	}
+	return assess.Run(ctx, cfg, &claude.CLIRunner{}, tmpDir)
+}
+
+func runImplement(ctx context.Context) error {
+	cfg, err := config.LoadImplementConfig()
+	if err != nil {
+		return err
+	}
+	if err := config.ValidateAuth(); err != nil {
+		return err
+	}
+	tmpDir, err := ensureTmpDir()
+	if err != nil {
+		return err
+	}
+
+	gitClient := &git.CLIClient{}
+	ghClient := &github.GithubClient{Token: cfg.PRCreateToken}
+	return implement.Run(ctx, cfg, &claude.CLIRunner{}, ghClient, gitClient, tmpDir)
+}
+
+func ensureTmpDir() (string, error) {
+	dir := os.Getenv("AUTOSOLVE_TMPDIR")
+	if dir != "" {
+		return dir, nil
+	}
+	dir, err := os.MkdirTemp("", "autosolve_*")
+	if err != nil {
+		return "", fmt.Errorf("creating temp dir: %w", err)
+	}
+	os.Setenv("AUTOSOLVE_TMPDIR", dir)
+	return dir, nil
+}
diff --git a/autosolve/go.mod b/autosolve/go.mod
@@ -0,0 +1,3 @@
+module github.com/cockroachdb/actions/autosolve
+
+go 1.23.8
-go 1.23.8
+go 1.23
-go 1.23.8
+go 1.23
diff --git a/autosolve/go.sum b/autosolve/go.sum