diff --git a/.prettierignore b/.prettierignore index af150df..1a10df6 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ node_modules +.claude plugins/tracing/dist pnpm-lock.yaml plugins/tracing/test/fixtures diff --git a/README.md b/README.md index e28b250..7955171 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,7 @@ Run a Codex turn, then open your Langfuse project to see the trace. | `LANGFUSE_CODEX_METADATA` | No | — | JSON object of metadata to attach to all traces | | `LANGFUSE_CODEX_MAX_CHARS` | No | `20000` | Truncate inputs/outputs longer than this many characters | | `LANGFUSE_CODEX_DEBUG` | No | `false` | Set to `"true"` for verbose logging to stderr | +| `LANGFUSE_CODEX_FAIL_ON_ERROR` | No | `false` | Set to `"true"` to make hook upload errors fail the hook | ### Data regions @@ -107,24 +108,29 @@ Run a Codex turn, then open your Langfuse project to see the trace. ## JSON config reference -| Config key | Environment variable | Default | Description | -| ------------- | ------------------------------------------------------------- | ---------------------------- | --------------------------------- | -| `enabled` | `TRACE_TO_LANGFUSE` | `false` | Enable tracing | -| `public_key` | `LANGFUSE_PUBLIC_KEY` / `LANGFUSE_CODEX_PUBLIC_KEY` | — | Langfuse public key | -| `secret_key` | `LANGFUSE_SECRET_KEY` / `LANGFUSE_CODEX_SECRET_KEY` | — | Langfuse secret key | -| `base_url` | `LANGFUSE_BASE_URL` / `LANGFUSE_CODEX_BASE_URL` | `https://cloud.langfuse.com` | Langfuse host | -| `environment` | `LANGFUSE_TRACING_ENVIRONMENT` / `LANGFUSE_CODEX_ENVIRONMENT` | — | Environment label | -| `user_id` | `LANGFUSE_CODEX_USER_ID` | — | User id for all traces | -| `tags` | `LANGFUSE_CODEX_TAGS` | — | Tags for all traces | -| `metadata` | `LANGFUSE_CODEX_METADATA` | — | Metadata object for all traces | -| `max_chars` | `LANGFUSE_CODEX_MAX_CHARS` | `20000` | Input/output truncation threshold | -| `debug` | `LANGFUSE_CODEX_DEBUG` | `false` | Verbose logging | +| Config key | Environment variable | Default | Description | +| --------------- | ------------------------------------------------------------- | ---------------------------- | --------------------------------- | +| `enabled` | `TRACE_TO_LANGFUSE` | `false` | Enable tracing | +| `public_key` | `LANGFUSE_PUBLIC_KEY` / `LANGFUSE_CODEX_PUBLIC_KEY` | — | Langfuse public key | +| `secret_key` | `LANGFUSE_SECRET_KEY` / `LANGFUSE_CODEX_SECRET_KEY` | — | Langfuse secret key | +| `base_url` | `LANGFUSE_BASE_URL` / `LANGFUSE_CODEX_BASE_URL` | `https://cloud.langfuse.com` | Langfuse host | +| `environment` | `LANGFUSE_TRACING_ENVIRONMENT` / `LANGFUSE_CODEX_ENVIRONMENT` | — | Environment label | +| `user_id` | `LANGFUSE_CODEX_USER_ID` | — | User id for all traces | +| `tags` | `LANGFUSE_CODEX_TAGS` | — | Tags for all traces | +| `metadata` | `LANGFUSE_CODEX_METADATA` | — | Metadata object for all traces | +| `max_chars` | `LANGFUSE_CODEX_MAX_CHARS` | `20000` | Input/output truncation threshold | +| `debug` | `LANGFUSE_CODEX_DEBUG` | `false` | Verbose logging | +| `fail_on_error` | `LANGFUSE_CODEX_FAIL_ON_ERROR` | `false` | Fail the hook on upload errors | ## Troubleshooting - **No traces appear** — confirm `plugin_hooks = true`, the plugin is enabled in `config.toml`, and `TRACE_TO_LANGFUSE=true` is visible to the Codex process. Run with `LANGFUSE_CODEX_DEBUG=true` to log to stderr. - **Authentication fails** — check that the public/secret keys are valid and that `LANGFUSE_BASE_URL` matches the region the keys belong to. - **Traces land in the wrong project** — API keys are project-scoped in Langfuse; use the keys for the project you want. +- **Testing hook failures** — set `LANGFUSE_CODEX_FAIL_ON_ERROR=true` together with `LANGFUSE_CODEX_DEBUG=true` to make Codex report upload or flush errors instead of failing open. +- **Checking dedup sidecars** — successful uploads of completed turns are recorded next to the rollout as `.jsonl.langfuse`. If a Stop hook reads the rollout before Codex has written the turn-completed marker, the trace may upload without a sidecar entry; the next Stop hook will finalize and mark it. +- **Verifying in Langfuse** — use `npx langfuse-cli api traces list --from-timestamp --limit 10 --order-by timestamp.desc --fields core,metrics,observations --json` with credentials for the same project. +- **Sandboxed/network-restricted runs** — Codex sandbox or network policy can prevent exports from reaching Langfuse. Debug logging and fail-on-error mode are the quickest way to distinguish hook execution from network failure. - **Self-hosting** — the TypeScript SDK requires Langfuse platform version >= 3.95.0. ## Data sent to Langfuse diff --git a/plugins/tracing/dist/index.mjs b/plugins/tracing/dist/index.mjs index 27e93ed..dc9a650 100644 --- a/plugins/tracing/dist/index.mjs +++ b/plugins/tracing/dist/index.mjs @@ -4275,14 +4275,16 @@ const ConfigSchema = object({ tags: array(string()).optional(), metadata: record(string(), string()).optional(), max_chars: number().int().positive(), - debug: boolean() + debug: boolean(), + fail_on_error: boolean() }); const PartialConfigSchema = ConfigSchema.partial(); const DEFAULTS = { enabled: false, base_url: "https://cloud.langfuse.com", max_chars: 2e4, - debug: false + debug: false, + fail_on_error: false }; function parseBoolean(value) { if (typeof value === "boolean") return value; @@ -4344,7 +4346,8 @@ async function readConfigFile(file) { tags: raw.tags != null ? parseTags(raw.tags) : void 0, metadata: raw.metadata != null ? parseMetadata(raw.metadata) : void 0, max_chars: raw.max_chars != null ? parseInteger(raw.max_chars) : void 0, - debug: raw.debug != null ? parseBoolean(raw.debug) : void 0 + debug: raw.debug != null ? parseBoolean(raw.debug) : void 0, + fail_on_error: raw.fail_on_error != null ? parseBoolean(raw.fail_on_error) : void 0 })); } catch { return; @@ -4364,7 +4367,8 @@ function readEnvConfig(env) { tags: parseTags(env.LANGFUSE_CODEX_TAGS), metadata: parseMetadata(env.LANGFUSE_CODEX_METADATA), max_chars: parseInteger(env.LANGFUSE_CODEX_MAX_CHARS), - debug: parseBoolean(env.LANGFUSE_CODEX_DEBUG) + debug: parseBoolean(env.LANGFUSE_CODEX_DEBUG), + fail_on_error: parseBoolean(env.LANGFUSE_CODEX_FAIL_ON_ERROR) })); } const getHomeDir = () => process.env.HOME ?? os$2.homedir(); @@ -46983,12 +46987,13 @@ async function convertRollout(rolloutFile, options) { if (turn.completed && turn.turnId) { uploaded.add(turn.turnId); await markTurnUploaded(rolloutFile, turn.turnId); - } + } else if (turn.turnId) debugLog(`uploaded in-progress turn ${turn.turnId}; waiting for completion before sidecar mark`); } } //#endregion //#region src/index.ts +let failOnError = process.env.LANGFUSE_CODEX_FAIL_ON_ERROR === "true"; /** * Entry point for the Codex `Stop` hook. * @@ -46997,7 +47002,9 @@ async function convertRollout(rolloutFile, options) { * transcript into Langfuse traces. * * The hook fails open: any error is logged (in debug mode) and swallowed so a -* tracing problem never blocks the Codex session. +* tracing problem never blocks the Codex session. Set +* `LANGFUSE_CODEX_FAIL_ON_ERROR=true` while testing if you want Codex to report +* hook failures instead. */ async function runHook() { let hookInput; @@ -47008,6 +47015,7 @@ async function runHook() { } const config$1 = await getConfig(); setDebug(config$1.debug); + failOnError = config$1.fail_on_error; if (!config$1.enabled) { debugLog("tracing disabled (set TRACE_TO_LANGFUSE=true to enable)"); return; @@ -47025,16 +47033,19 @@ async function runHook() { await convertRollout(hookInput.transcript_path, { config: config$1 }); } catch (error) { debugLog("failed to convert rollout:", error); + if (config$1.fail_on_error) throw error; } finally { try { await instrumentation.shutdown(); } catch (error) { debugLog("error during flush/shutdown:", error); + if (config$1.fail_on_error) throw error; } } } runHook().catch((error) => { if (process.env.LANGFUSE_CODEX_DEBUG === "true") console.error("[langfuse-codex] fatal:", error); + if (failOnError) process.exitCode = 1; }); //#endregion diff --git a/plugins/tracing/hooks/hooks.json b/plugins/tracing/hooks/hooks.json index dd3d3e8..ca7598c 100644 --- a/plugins/tracing/hooks/hooks.json +++ b/plugins/tracing/hooks/hooks.json @@ -5,7 +5,7 @@ "hooks": [ { "type": "command", - "command": "node ./plugins/tracing/dist/index.mjs", + "command": "node \"${CODEX_HOME:-$HOME/.codex}/plugins/cache/codex-observability-plugin/tracing/0.1.0/dist/index.mjs\"", "timeout": 30, "statusMessage": "Uploading Codex trace to Langfuse" } diff --git a/plugins/tracing/src/config.ts b/plugins/tracing/src/config.ts index 57a2296..669398a 100644 --- a/plugins/tracing/src/config.ts +++ b/plugins/tracing/src/config.ts @@ -35,17 +35,20 @@ export const ConfigSchema = z.object({ max_chars: z.number().int().positive(), // LANGFUSE_CODEX_DEBUG debug: z.boolean(), + // LANGFUSE_CODEX_FAIL_ON_ERROR + fail_on_error: z.boolean(), }); export type Config = z.infer; const PartialConfigSchema = ConfigSchema.partial(); -const DEFAULTS: Pick = { +const DEFAULTS: Pick = { enabled: false, base_url: "https://cloud.langfuse.com", max_chars: 20_000, debug: false, + fail_on_error: false, }; function parseBoolean(value: unknown): boolean | undefined { @@ -118,6 +121,7 @@ async function readConfigFile(file: string): Promise | undefined metadata: raw.metadata != null ? parseMetadata(raw.metadata) : undefined, max_chars: raw.max_chars != null ? parseInteger(raw.max_chars) : undefined, debug: raw.debug != null ? parseBoolean(raw.debug) : undefined, + fail_on_error: raw.fail_on_error != null ? parseBoolean(raw.fail_on_error) : undefined, }), ); } catch { @@ -142,6 +146,7 @@ function readEnvConfig(env: Record): Partial metadata: parseMetadata(env.LANGFUSE_CODEX_METADATA), max_chars: parseInteger(env.LANGFUSE_CODEX_MAX_CHARS), debug: parseBoolean(env.LANGFUSE_CODEX_DEBUG), + fail_on_error: parseBoolean(env.LANGFUSE_CODEX_FAIL_ON_ERROR), }), ); } diff --git a/plugins/tracing/src/index.ts b/plugins/tracing/src/index.ts index d2a132d..b32a583 100644 --- a/plugins/tracing/src/index.ts +++ b/plugins/tracing/src/index.ts @@ -4,6 +4,8 @@ import { convertRollout } from "./trace.js"; import type { HookInput } from "./types.js"; import { debugLog, readStdin, setDebug } from "./utils.js"; +let failOnError = process.env.LANGFUSE_CODEX_FAIL_ON_ERROR === "true"; + /** * Entry point for the Codex `Stop` hook. * @@ -12,7 +14,9 @@ import { debugLog, readStdin, setDebug } from "./utils.js"; * transcript into Langfuse traces. * * The hook fails open: any error is logged (in debug mode) and swallowed so a - * tracing problem never blocks the Codex session. + * tracing problem never blocks the Codex session. Set + * `LANGFUSE_CODEX_FAIL_ON_ERROR=true` while testing if you want Codex to report + * hook failures instead. */ export async function runHook(): Promise { let hookInput: HookInput; @@ -25,6 +29,7 @@ export async function runHook(): Promise { const config = await getConfig(); setDebug(config.debug); + failOnError = config.fail_on_error; if (!config.enabled) { debugLog("tracing disabled (set TRACE_TO_LANGFUSE=true to enable)"); @@ -44,19 +49,24 @@ export async function runHook(): Promise { await convertRollout(hookInput.transcript_path, { config }); } catch (error) { debugLog("failed to convert rollout:", error); + if (config.fail_on_error) throw error; } finally { try { await instrumentation.shutdown(); } catch (error) { debugLog("error during flush/shutdown:", error); + if (config.fail_on_error) throw error; } } } runHook().catch((error) => { - // Last-resort guard: never throw out of the hook. + // Last-resort guard: fail open unless explicitly requested for testing. if (process.env.LANGFUSE_CODEX_DEBUG === "true") { // eslint-disable-next-line no-console console.error("[langfuse-codex] fatal:", error); } + if (failOnError) { + process.exitCode = 1; + } }); diff --git a/plugins/tracing/src/trace.ts b/plugins/tracing/src/trace.ts index 39eb747..2c949ab 100644 --- a/plugins/tracing/src/trace.ts +++ b/plugins/tracing/src/trace.ts @@ -275,6 +275,10 @@ export async function convertRollout( if (turn.completed && turn.turnId) { uploaded.add(turn.turnId); await markTurnUploaded(rolloutFile, turn.turnId); + } else if (turn.turnId) { + debugLog( + `uploaded in-progress turn ${turn.turnId}; waiting for completion before sidecar mark`, + ); } } } diff --git a/plugins/tracing/test/config.test.ts b/plugins/tracing/test/config.test.ts index 3b2b41d..797c56b 100644 --- a/plugins/tracing/test/config.test.ts +++ b/plugins/tracing/test/config.test.ts @@ -33,6 +33,7 @@ describe("getConfig", () => { expect(config.enabled).toBe(false); expect(config.base_url).toBe("https://cloud.langfuse.com"); expect(config.max_chars).toBe(20_000); + expect(config.fail_on_error).toBe(false); }); it("reads credentials and enable flag from environment variables", async () => { @@ -118,4 +119,21 @@ describe("getConfig", () => { const config = await getConfig({ home, cwd: emptyHome(), env: {} }); expect(config.enabled).toBe(false); }); + + it("parses fail-on-error from config and environment", async () => { + const home = makeTmpHome({ + rel: ".codex/langfuse.json", + contents: { fail_on_error: "true" }, + }); + + const fromFile = await getConfig({ home, cwd: emptyHome(), env: {} }); + expect(fromFile.fail_on_error).toBe(true); + + const fromEnv = await getConfig({ + home, + cwd: emptyHome(), + env: { LANGFUSE_CODEX_FAIL_ON_ERROR: "false" }, + }); + expect(fromEnv.fail_on_error).toBe(false); + }); }); diff --git a/plugins/tracing/test/hook-command.test.ts b/plugins/tracing/test/hook-command.test.ts new file mode 100644 index 0000000..a3bd5ef --- /dev/null +++ b/plugins/tracing/test/hook-command.test.ts @@ -0,0 +1,116 @@ +import { spawn } from "node:child_process"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +import { afterEach, describe, expect, it } from "vitest"; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.."); +const hookConfigFile = path.join(repoRoot, "plugins/tracing/hooks/hooks.json"); +const bundleFile = path.join(repoRoot, "plugins/tracing/dist/index.mjs"); +const pluginManifestFile = path.join(repoRoot, "plugins/tracing/.codex-plugin/plugin.json"); + +const tmpDirs: string[] = []; + +function makeTempDir(prefix: string): string { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), prefix)); + tmpDirs.push(dir); + return dir; +} + +function readHookCommand(): string { + const config = JSON.parse(fs.readFileSync(hookConfigFile, "utf-8")) as { + hooks: { Stop: Array<{ hooks: Array<{ command: string }> }> }; + }; + return config.hooks.Stop[0].hooks[0].command; +} + +function readPluginVersion(): string { + const manifest = JSON.parse(fs.readFileSync(pluginManifestFile, "utf-8")) as { version: string }; + return manifest.version; +} + +function stageInstalledPlugin(codexHome: string): void { + const installedBundle = path.join( + codexHome, + "plugins/cache/codex-observability-plugin/tracing/0.1.0/dist/index.mjs", + ); + fs.mkdirSync(path.dirname(installedBundle), { recursive: true }); + fs.copyFileSync(bundleFile, installedBundle); +} + +function runShellCommand( + command: string, + options: { cwd: string; env: NodeJS.ProcessEnv; input: string }, +): Promise<{ code: number | null; stderr: string; stdout: string }> { + return new Promise((resolve, reject) => { + const child = spawn(command, { + cwd: options.cwd, + env: options.env, + shell: true, + stdio: ["pipe", "pipe", "pipe"], + }); + let stdout = ""; + let stderr = ""; + const timeout = setTimeout(() => { + child.kill("SIGTERM"); + reject(new Error("hook command timed out")); + }, 10_000); + + child.stdout.setEncoding("utf-8"); + child.stderr.setEncoding("utf-8"); + child.stdout.on("data", (chunk) => (stdout += chunk)); + child.stderr.on("data", (chunk) => (stderr += chunk)); + child.once("error", (error) => { + clearTimeout(timeout); + reject(error); + }); + child.once("close", (code) => { + clearTimeout(timeout); + resolve({ code, stdout, stderr }); + }); + child.stdin.end(options.input); + }); +} + +afterEach(() => { + while (tmpDirs.length) { + fs.rmSync(tmpDirs.pop()!, { recursive: true, force: true }); + } +}); + +describe("bundled Stop hook command", () => { + it("runs from an arbitrary session cwd via CODEX_HOME instead of a relative repo path", async () => { + const codexHome = makeTempDir("lf-codex-home-"); + const sessionCwd = makeTempDir("lf-codex-cwd-"); + stageInstalledPlugin(codexHome); + + const { code, stderr, stdout } = await runShellCommand(readHookCommand(), { + cwd: sessionCwd, + env: { + ...process.env, + CODEX_HOME: codexHome, + HOME: codexHome, + }, + input: JSON.stringify({ + hook_event_name: "Stop", + transcript_path: path.join(sessionCwd, "rollout.jsonl"), + }), + }); + + expect(code).toBe(0); + expect(stdout).toBe(""); + expect(stderr).toBe(""); + }); + + it("does not depend on the old marketplace-root relative path", () => { + expect(readHookCommand()).not.toContain("./plugins/tracing/dist/index.mjs"); + }); + + it("points at the installed cache path for this plugin version", () => { + expect(readHookCommand()).toContain( + `/plugins/cache/codex-observability-plugin/tracing/${readPluginVersion()}/dist/index.mjs`, + ); + }); +}); diff --git a/plugins/tracing/test/trace.test.ts b/plugins/tracing/test/trace.test.ts index 948a201..ec429d1 100644 --- a/plugins/tracing/test/trace.test.ts +++ b/plugins/tracing/test/trace.test.ts @@ -24,6 +24,7 @@ const baseConfig: Config = { base_url: "https://cloud.langfuse.com", max_chars: 20_000, debug: false, + fail_on_error: false, }; const fixturesRoot = path.join(path.dirname(fileURLToPath(import.meta.url)), "fixtures/sessions");