From db4c9f36411d5bb357897763760bf250f934e375 Mon Sep 17 00:00:00 2001 From: Colby McHenry Date: Wed, 17 Jun 2026 14:18:22 -0500 Subject: [PATCH] feat(offload): reasoning offload for codegraph_explore (bring-your-own endpoint) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit codegraph_explore can now hand the source it retrieved to a reasoning model you point at — any OpenAI-compatible endpoint (Cerebras, OpenAI, a local vLLM/Ollama) with your own key — and return that model's tight, cited answer instead of the raw source dump. The agent's main context gets the answer in far fewer tokens, at the cost of one network round-trip. Off by default. Configure with `codegraph offload set-endpoint --model --key-env ` (or the CODEGRAPH_OFFLOAD_* env vars); status/disable manage it. The API key is never written to disk — the config stores the NAME of an env var and the key is read from it at call time. Strictly degradable: any failure (no endpoint, network, timeout, empty answer) returns null and the call falls back to the local source, so the offload can never surface an error to the agent. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 1 + README.md | 38 ++++++++ __tests__/offload.test.ts | 186 ++++++++++++++++++++++++++++++++++++ src/bin/codegraph.ts | 61 ++++++++++++ src/mcp/tools.ts | 12 +++ src/reasoning/config.ts | 127 +++++++++++++++++++++++++ src/reasoning/reasoner.ts | 194 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 619 insertions(+) create mode 100644 __tests__/offload.test.ts create mode 100644 src/reasoning/config.ts create mode 100644 src/reasoning/reasoner.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b1a325de..730af3ecb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### New Features +- Optional **reasoning offload** for `codegraph_explore` (off by default). Point CodeGraph at any OpenAI-compatible reasoning model you bring — Cerebras, OpenAI, a local vLLM or Ollama — and `codegraph_explore` hands the source it retrieved to that model and returns a tight, cited answer instead of a wall of source, so your agent's main context gets the answer in far fewer tokens. Turn it on with `codegraph offload set-endpoint --model --key-env ` (or the `CODEGRAPH_OFFLOAD_*` env vars), and `codegraph offload status` / `codegraph offload disable` manage it. Your API key is never written to disk (the config stores the *name* of the env var to read it from), nothing but the retrieved context and your question leaves your machine, and it silently falls back to normal local output on any error so it can never break a call. - Impact and blast-radius analysis for TypeScript, JavaScript, Go, Python, Rust, Ruby, C, Java, C#, PHP, Scala, Kotlin, Swift, Dart, and Pascal/Delphi now understands the readers of a constant. When you change a file-scope, package-level, module-level, or class-level constant — a config object, a lookup table, a shared constant — the other symbols in that file that read it now show up as affected, where before they were invisible (impact only followed calls, imports, and inheritance, so a constant's consumers looked like "nothing depends on this"). This makes `codegraph impact`, and the impact trail in `codegraph_explore`/`codegraph_node`, catch the "change this table, break its readers" class of change. It's on by default and adds no nodes to your graph; bundled/minified files and ambiguously-shadowed names are skipped to keep results precise. Set `CODEGRAPH_VALUE_REFS=0` to turn it off. - C file-scope constants and globals — `static const` scalars, pointer/array lookup tables, and shared mutable globals — are now recognized as symbols in their own right. They previously weren't extracted at all, so they never appeared in search or carried any dependents; now they show up in `codegraph search` and participate in impact analysis (see above), so changing a C lookup table surfaces the same-file functions that read it. - Java `static final` constants, C# `const` / `static readonly` constants, Scala `object` vals, and Kotlin top-level / `object` / `companion object` `val`s are now classified as constants rather than generic fields, so they participate in the constant-reader impact analysis above — change a `public static final` table, a `const string`, a Scala `object Config { val Timeout = … }`, or a Kotlin `companion object { const val … }` and the methods that read it now show up as affected. (Per-object Java `final` / C# `readonly` / Scala & Kotlin `class` instance properties are unchanged.) Kotlin constants were previously not indexed as their own symbols at all, so they now also appear in `codegraph search`. diff --git a/README.md b/README.md index 354af2463..c182f453f 100644 --- a/README.md +++ b/README.md @@ -606,6 +606,44 @@ add a negation — `!vendor/`. The defaults apply uniformly, so committing a dependency or build directory doesn't force it into the graph; the `.gitignore` negation is the explicit opt-in. +## Reasoning offload (bring your own model) + +**Optional, off by default.** Normally `codegraph_explore` returns the verbatim +source it retrieved and your agent reasons over it. With reasoning offload, that +source is instead handed to a reasoning model **you** point at, which returns a +tight, cited answer — so your agent's main context gets the answer, not a wall of +source. You trade one network round-trip for far fewer main-context tokens. + +Point it at **any** OpenAI-compatible endpoint with your own key — Cerebras, +OpenAI, a local vLLM or Ollama, anything. Nothing but the assembled context + your +question leaves your machine, and your API key is **never written to disk** (the +config stores the *name* of an env var; the key is read from it at call time). + +```bash +# Enable — URL ends in /v1; the key is read from the named env var at call time +codegraph offload set-endpoint https://api.cerebras.ai/v1 \ + --model gpt-oss-120b --key-env CEREBRAS_API_KEY + +codegraph offload status # show the current endpoint / model / key source +codegraph offload disable # turn it back off +``` + +Restart your editor/agent session afterward so running MCP servers pick it up. +Everything is also settable by env (these override the saved config — handy for +CI): `CODEGRAPH_OFFLOAD_URL`, `_MODEL`, `_KEY`, `_EFFORT` (`low`|`medium`|`high`), +`_STYLE` (`plain`|`report`). + +A few things worth knowing: + +- **Quality tracks the model you choose.** The synthesis prompt is correctness-first + (it leads with a `Coverage: full / partial / not found` verdict and cites + `file:line` for every claim, so answers stay verifiable), but a weak endpoint can + still be confidently wrong. It's designed and validated against `gpt-oss-120b`-class + models at low temperature. +- **It's strictly degradable.** Any failure — no endpoint, network error, timeout, + empty answer — silently falls back to returning the local source. The offload can + never break a call. + ## Telemetry CodeGraph collects **anonymous usage statistics** — which tools and commands get diff --git a/__tests__/offload.test.ts b/__tests__/offload.test.ts new file mode 100644 index 000000000..886d06dd8 --- /dev/null +++ b/__tests__/offload.test.ts @@ -0,0 +1,186 @@ +/** + * Reasoning offload — config resolution, persistence, and strict degradation. + * + * The offload sends explore's assembled source to a BYO OpenAI-compatible + * reasoning endpoint and returns the synthesized answer. Two invariants are + * load-bearing and covered here: + * 1. The API key is NEVER written to disk — the config stores only the NAME of + * an env var (`keyEnv`); the key is resolved at call time. + * 2. The path is STRICTLY DEGRADABLE — any failure (no endpoint, network error, + * non-2xx, empty body) returns null so the caller serves local source; it + * never throws and never surfaces an error to the agent. + */ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { + readOffloadConfig, + writeOffloadConfig, + resolveOffload, +} from '../src/reasoning/config'; +import { isOffloadEnabled, synthesizeOffload, stripAgentDirectives } from '../src/reasoning/reasoner'; + +describe('reasoning offload', () => { + let home: string; + + // Point ~/.codegraph at a throwaway dir (os.homedir() honors $HOME on POSIX, + // $USERPROFILE on Windows) + start from a clean env each test. + const HOME_ENV = ['HOME', 'USERPROFILE']; + const OFFLOAD_ENV = [ + 'CODEGRAPH_OFFLOAD_URL', 'CODEGRAPH_OFFLOAD_MODEL', 'CODEGRAPH_OFFLOAD_KEY', + 'CODEGRAPH_OFFLOAD_EFFORT', 'CODEGRAPH_OFFLOAD_STYLE', 'CODEGRAPH_OFFLOAD_TIMEOUT_MS', + 'CODEGRAPH_OFFLOAD_MAXTOKENS', 'CODEGRAPH_OFFLOAD_STRIP', 'CODEGRAPH_OFFLOAD_DEBUG', + 'CEREBRAS_API_KEY', + ]; + let saved: Record; + + beforeEach(() => { + home = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-offload-')); + saved = {}; + for (const k of [...HOME_ENV, ...OFFLOAD_ENV]) { saved[k] = process.env[k]; delete process.env[k]; } + process.env.HOME = home; + process.env.USERPROFILE = home; + }); + + afterEach(() => { + for (const k of [...HOME_ENV, ...OFFLOAD_ENV]) { + if (saved[k] === undefined) delete process.env[k]; + else process.env[k] = saved[k]; + } + vi.restoreAllMocks(); + if (fs.existsSync(home)) fs.rmSync(home, { recursive: true, force: true }); + }); + + describe('config persistence', () => { + it('is off, with sensible defaults, when nothing is configured', () => { + const c = resolveOffload(); + expect(c.enabled).toBe(false); + expect(c.origin).toBe('none'); + expect(c.model).toBe('gpt-oss-120b'); + expect(c.effort).toBe('low'); + expect(c.style).toBe('plain'); + expect(isOffloadEnabled()).toBe(false); + }); + + it('round-trips the config block and never writes the API key to disk', () => { + writeOffloadConfig({ url: 'https://api.cerebras.ai/v1', model: 'gpt-oss-120b', keyEnv: 'CEREBRAS_API_KEY' }); + expect(readOffloadConfig().url).toBe('https://api.cerebras.ai/v1'); + + const raw = fs.readFileSync(path.join(home, '.codegraph', 'config.json'), 'utf8'); + expect(raw).toContain('CEREBRAS_API_KEY'); // the env var NAME is stored + // ...but no actual secret material. Set a key and confirm it isn't on disk. + process.env.CEREBRAS_API_KEY = 'sk-super-secret-value'; + expect(fs.readFileSync(path.join(home, '.codegraph', 'config.json'), 'utf8')) + .not.toContain('sk-super-secret-value'); + }); + + it('resolves the API key from the configured env var at call time', () => { + writeOffloadConfig({ url: 'https://api.cerebras.ai/v1', keyEnv: 'CEREBRAS_API_KEY' }); + expect(resolveOffload().apiKey).toBeUndefined(); // env var not set yet + process.env.CEREBRAS_API_KEY = 'sk-live'; + const c = resolveOffload(); + expect(c.enabled).toBe(true); + expect(c.apiKey).toBe('sk-live'); + expect(c.keySource).toBe('CEREBRAS_API_KEY'); + expect(c.origin).toBe('config'); + }); + + it('clears the offload block on disable, leaving other config keys intact', () => { + const cfgPath = path.join(home, '.codegraph', 'config.json'); + fs.mkdirSync(path.dirname(cfgPath), { recursive: true }); + fs.writeFileSync(cfgPath, JSON.stringify({ somethingElse: 1, offload: { url: 'x' } })); + writeOffloadConfig(null); + const after = JSON.parse(fs.readFileSync(cfgPath, 'utf8')); + expect(after.offload).toBeUndefined(); + expect(after.somethingElse).toBe(1); + }); + }); + + describe('env overrides config', () => { + it('lets CODEGRAPH_OFFLOAD_URL override the file and report origin=env', () => { + writeOffloadConfig({ url: 'https://file.example/v1' }); + process.env.CODEGRAPH_OFFLOAD_URL = 'https://env.example/v1'; + const c = resolveOffload(); + expect(c.url).toBe('https://env.example/v1'); + expect(c.origin).toBe('env'); + }); + + it('reads the key directly from CODEGRAPH_OFFLOAD_KEY when set', () => { + process.env.CODEGRAPH_OFFLOAD_URL = 'https://env.example/v1'; + process.env.CODEGRAPH_OFFLOAD_KEY = 'sk-direct'; + const c = resolveOffload(); + expect(c.apiKey).toBe('sk-direct'); + expect(c.keySource).toBe('CODEGRAPH_OFFLOAD_KEY'); + }); + }); + + describe('strict degradation (never throws, returns null to fall back)', () => { + it('returns null when no endpoint is configured', async () => { + expect(await synthesizeOffload({ query: 'q', context: 'ctx' })).toBeNull(); + }); + + it('returns null when the upstream request rejects', async () => { + writeOffloadConfig({ url: 'https://api.cerebras.ai/v1' }); + vi.stubGlobal('fetch', vi.fn().mockRejectedValue(new Error('ECONNREFUSED'))); + expect(await synthesizeOffload({ query: 'q', context: 'ctx' })).toBeNull(); + }); + + it('returns null on a non-2xx response', async () => { + writeOffloadConfig({ url: 'https://api.cerebras.ai/v1' }); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: false, status: 500, text: async () => 'boom', + })); + expect(await synthesizeOffload({ query: 'q', context: 'ctx' })).toBeNull(); + }); + + it('returns null when the model returns an empty answer', async () => { + writeOffloadConfig({ url: 'https://api.cerebras.ai/v1' }); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: true, status: 200, json: async () => ({ choices: [{ message: { content: ' ' } }] }), + })); + expect(await synthesizeOffload({ query: 'q', context: 'ctx' })).toBeNull(); + }); + }); + + describe('success path', () => { + it('returns the synthesized answer (with the plain footer) and posts an OpenAI-compatible body with the key', async () => { + writeOffloadConfig({ url: 'https://api.cerebras.ai/v1', model: 'gpt-oss-120b', keyEnv: 'CEREBRAS_API_KEY' }); + process.env.CEREBRAS_API_KEY = 'sk-live'; + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, status: 200, + json: async () => ({ choices: [{ message: { content: 'Coverage: full.\nThe answer.' }, finish_reason: 'stop' }] }), + }); + vi.stubGlobal('fetch', fetchMock); + + const out = await synthesizeOffload({ query: 'how does X work', context: 'source here' }); + expect(out).toContain('Coverage: full.'); + expect(out).toContain('Synthesized by CodeGraph'); // plain footer present + + const [calledUrl, init] = fetchMock.mock.calls[0]; + expect(calledUrl).toBe('https://api.cerebras.ai/v1/chat/completions'); + expect((init.headers as Record).authorization).toBe('Bearer sk-live'); + const body = JSON.parse(init.body as string); + expect(body.model).toBe('gpt-oss-120b'); + expect(body.messages[1].content).toContain('source here'); + expect(body.messages[1].content).toContain('how does X work'); + }); + }); + + describe('stripAgentDirectives', () => { + it('drops the agent-directed header but keeps source sections', () => { + const ctx = [ + '## Exploration: how does X work', + 'Found 12 symbols across 3 files.', + '', + '#### src/a.ts — foo(function)', + 'code body', + ].join('\n'); + const stripped = stripAgentDirectives(ctx); + expect(stripped).not.toContain('## Exploration:'); + expect(stripped).not.toContain('Found 12 symbols'); + expect(stripped).toContain('#### src/a.ts'); + expect(stripped).toContain('code body'); + }); + }); +}); diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index b0c2f4b48..e358a1ef3 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -36,6 +36,7 @@ import { installFatalHandlers } from './fatal-handler'; import { relaunchWithWasmRuntimeFlagsIfNeeded } from '../extraction/wasm-runtime-flags'; import { EXTRACTION_VERSION } from '../extraction/extraction-version'; import { getTelemetry, TELEMETRY_DOCS, recordIndexEvent } from '../telemetry'; +import { writeOffloadConfig, resolveOffload } from '../reasoning/config'; // Lazy-load heavy modules (CodeGraph, runInstaller) to keep CLI startup fast. async function loadCodeGraph(): Promise { @@ -1348,6 +1349,66 @@ program }); }); +/** + * codegraph offload — configure the reasoning offload (bring-your-own endpoint). + * + * When set, codegraph_explore reasons over its assembled source with a remote + * model and returns the synthesized answer instead of the raw source dump. + */ +const offloadCmd = program + .command('offload') + .description('Configure the reasoning offload — let codegraph_explore answer via your own reasoning model'); + +offloadCmd + .command('set-endpoint ') + .description('Send explore output to an OpenAI-compatible reasoning endpoint (URL ends in /v1)') + .option('--model ', 'Model id to request', 'gpt-oss-120b') + .option('--key-env ', 'Name of the env var holding the API key (the key is never written to disk)') + .option('--effort ', 'reasoning_effort: low | medium | high') + .option('--style