From e3e5590507f064ef8ed0ae648c76c83cfa5c2eae Mon Sep 17 00:00:00 2001 From: Neil Daquioag <405533+ndycode@users.noreply.github.com> Date: Sun, 1 Mar 2026 08:28:58 +0800 Subject: [PATCH 1/3] feat: add local reliability operation KPIs Instrument operation lifecycle audit events across request/auth/tool flows, expose reliability KPIs in codex-metrics, and align docs/tests with retention-bounded local audit semantics. Co-authored-by: Codex --- README.md | 3 +- docs/privacy.md | 11 + index.ts | 671 ++++++++++++++++++++++++++++++++++++++++++++- lib/audit.ts | 93 ++++++- test/audit.test.ts | 60 ++++ test/index.test.ts | 202 ++++++++++++++ 6 files changed, 1034 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 13e46026..58e9c4b4 100644 --- a/README.md +++ b/README.md @@ -437,7 +437,7 @@ codex-status ### codex-metrics -Show live runtime metrics (request counts, latency, errors, retries, and safe mode). +Show live runtime metrics (request counts, latency, errors, retries, and safe mode), plus local-only reliability KPIs computed from local audit events in a best-effort 24h, retention-bounded window. ```text codex-metrics @@ -554,6 +554,7 @@ codex-dashboard | `codex-remove` | Remove account entry | `codex-remove index=3` | | `codex-export` | Export account backups | `codex-export` | | `codex-import` | Dry-run or apply imports | `codex-import path="~/backup/accounts.json" dryRun=true` | +| `codex-sync` | Manual bidirectional sync with Codex CLI auth | `codex-sync direction="pull"` | --- diff --git a/docs/privacy.md b/docs/privacy.md index cdbe5bb3..bd449344 100644 --- a/docs/privacy.md +++ b/docs/privacy.md @@ -37,6 +37,16 @@ All data is stored **locally on your machine**: - **Purpose:** Reduce GitHub API calls and improve performance - **TTL:** 15 minutes (automatically refreshes when stale) +### Audit Logs (Local Reliability Metrics) +- **Location:** `~/.opencode/logs/audit.log` (rotated as `audit.1.log`, `audit.2.log`, etc.) +- **Contents:** Local operation lifecycle events used by `codex-metrics` for best-effort 24h reliability KPIs (bounded by local log retention) +- **Includes:** + - Operation class/name (`request`, `auth`, `tool`, `sync`, `startup`, `ui_event`) + - Timing and outcome (`start`, `success`, `failure`, `retry`, `recovery`) + - Error category and HTTP status (when available) +- **Does not include:** prompt text, model responses, OAuth tokens, or secrets +- **Scope:** Local-only; never transmitted to third parties + ### Debug Logs - **Location:** `~/.opencode/logs/codex-plugin/` - **Contents:** Request/response metadata logs (only when `ENABLE_PLUGIN_REQUEST_LOGGING=1` is set) @@ -114,6 +124,7 @@ rm -rf ~/.opencode/cache/ ### Delete Logs ```bash rm -rf ~/.opencode/logs/codex-plugin/ +rm -f ~/.opencode/logs/audit*.log ``` ### Revoke OAuth Access diff --git a/index.ts b/index.ts index 4f7a4a59..bd7ba9bb 100644 --- a/index.ts +++ b/index.ts @@ -26,6 +26,7 @@ import { tool } from "@opencode-ai/plugin/tool"; import type { Plugin, PluginInput } from "@opencode-ai/plugin"; import type { Auth } from "@opencode-ai/sdk"; +import { randomUUID } from "node:crypto"; import { createAuthorizationFlow, exchangeAuthorizationCode, @@ -169,6 +170,15 @@ import { type ModelFamily, } from "./lib/prompts/codex.js"; import { prewarmOpenCodeCodexPrompt } from "./lib/prompts/opencode-codex.js"; +import { + auditLog, + AuditAction, + AuditOutcome, + readAuditEntries, + OPERATION_EVENT_VERSION, + type OperationClass, + type ReliabilityAuditMetadata, +} from "./lib/audit.js"; import type { AccountIdSource, OAuthAuthDetails, @@ -280,6 +290,404 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { lastSelectionSnapshot: null, }; + const processSessionId = randomUUID(); + let operationSequence = 0; + + type OperationTracker = { + operationId: string; + operationClass: OperationClass; + operationName: string; + startedAt: number; + attemptNo: number; + retryCount: number; + manualRecoveryRequired: boolean; + modelFamily?: string; + retryProfile?: string; + extraMetadata?: Record; + }; + + type OperationStartOptions = { + operationClass: OperationClass; + operationName: string; + attemptNo?: number; + retryCount?: number; + manualRecoveryRequired?: boolean; + modelFamily?: string; + retryProfile?: string; + extraMetadata?: Record; + }; + + type OperationStatusOptions = { + errorCategory?: string; + httpStatus?: number; + manualRecoveryRequired?: boolean; + extraMetadata?: Record; + }; + + type ReliabilityKpiSnapshot = { + requestStarts24h: number; + uninterruptedCompletionRate24h: number | null; + firstAttemptSuccessRate24h: number | null; + autoRecoverySuccessRate24h: number | null; + tokenRefreshSuccessRate24h: number | null; + operationSuccessRateByClass24h: Record; + }; + + const formatPercent = (value: number | null): string => + value === null ? "n/a" : `${value.toFixed(1)}%`; + + const toPercent = (numerator: number, denominator: number): number | null => + denominator > 0 ? (numerator / denominator) * 100 : null; + + const createOperationId = (operationClass: OperationClass): string => { + operationSequence += 1; + return `${operationClass}-${Date.now()}-${operationSequence}-${randomUUID().slice(0, 8)}`; + }; + + const buildOperationMetadata = ( + state: OperationTracker, + overrides: Partial = {}, + ): ReliabilityAuditMetadata => ({ + event_version: OPERATION_EVENT_VERSION, + operation_id: state.operationId, + process_session_id: processSessionId, + operation_class: state.operationClass, + operation_name: state.operationName, + attempt_no: state.attemptNo, + retry_count: state.retryCount, + manual_recovery_required: state.manualRecoveryRequired, + beginner_safe_mode: beginnerSafeModeEnabled, + ...(state.modelFamily ? { model_family: state.modelFamily } : {}), + ...(state.retryProfile ? { retry_profile: state.retryProfile } : {}), + ...(state.extraMetadata ?? {}), + ...overrides, + }); + + const startOperation = ({ + operationClass, + operationName, + attemptNo = 1, + retryCount = 0, + manualRecoveryRequired = false, + modelFamily, + retryProfile: operationRetryProfile, + extraMetadata, + }: OperationStartOptions): OperationTracker => { + const state: OperationTracker = { + operationId: createOperationId(operationClass), + operationClass, + operationName, + startedAt: Date.now(), + attemptNo, + retryCount, + manualRecoveryRequired, + modelFamily, + retryProfile: operationRetryProfile, + extraMetadata, + }; + auditLog( + AuditAction.OPERATION_START, + "plugin", + operationName, + AuditOutcome.PARTIAL, + buildOperationMetadata(state), + ); + return state; + }; + + const markOperationRetry = (state: OperationTracker, options: OperationStatusOptions = {}): void => { + state.retryCount += 1; + auditLog( + AuditAction.OPERATION_RETRY, + "plugin", + state.operationName, + AuditOutcome.PARTIAL, + buildOperationMetadata(state, { + ...(options.errorCategory ? { error_category: options.errorCategory } : {}), + ...(typeof options.httpStatus === "number" ? { http_status: options.httpStatus } : {}), + ...(options.extraMetadata ?? {}), + }), + ); + }; + + const markOperationRecovery = ( + state: OperationTracker, + options: OperationStatusOptions & { recoveryStep: string }, + ): void => { + auditLog( + AuditAction.OPERATION_RECOVERY, + "plugin", + state.operationName, + AuditOutcome.PARTIAL, + buildOperationMetadata(state, { + ...(options.errorCategory ? { error_category: options.errorCategory } : {}), + ...(typeof options.httpStatus === "number" ? { http_status: options.httpStatus } : {}), + recovery_step: options.recoveryStep, + ...(options.extraMetadata ?? {}), + }), + ); + }; + + const completeOperationSuccess = ( + state: OperationTracker, + options: OperationStatusOptions = {}, + ): void => { + if (options.manualRecoveryRequired === true) { + state.manualRecoveryRequired = true; + } + auditLog( + AuditAction.OPERATION_SUCCESS, + "plugin", + state.operationName, + AuditOutcome.SUCCESS, + buildOperationMetadata(state, { + duration_ms: Math.max(0, Date.now() - state.startedAt), + ...(options.errorCategory ? { error_category: options.errorCategory } : {}), + ...(typeof options.httpStatus === "number" ? { http_status: options.httpStatus } : {}), + ...(options.extraMetadata ?? {}), + }), + ); + }; + + const completeOperationFailure = ( + state: OperationTracker, + options: OperationStatusOptions = {}, + ): void => { + if (options.manualRecoveryRequired === true) { + state.manualRecoveryRequired = true; + } + auditLog( + AuditAction.OPERATION_FAILURE, + "plugin", + state.operationName, + AuditOutcome.FAILURE, + buildOperationMetadata(state, { + duration_ms: Math.max(0, Date.now() - state.startedAt), + error_category: options.errorCategory ?? "unknown", + ...(typeof options.httpStatus === "number" ? { http_status: options.httpStatus } : {}), + ...(options.extraMetadata ?? {}), + }), + ); + }; + + const computeReliabilityKpis = (nowMs: number): ReliabilityKpiSnapshot => { + const sinceMs = nowMs - 24 * 60 * 60 * 1000; + const entries = readAuditEntries({ sinceMs }); + const operationEntries = entries.filter((entry) => + entry.action === AuditAction.OPERATION_START || + entry.action === AuditAction.OPERATION_SUCCESS || + entry.action === AuditAction.OPERATION_FAILURE || + entry.action === AuditAction.OPERATION_RETRY || + entry.action === AuditAction.OPERATION_RECOVERY, + ); + + const requestStarts = operationEntries.filter((entry) => { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + return ( + entry.action === AuditAction.OPERATION_START && + metadata?.operation_class === "request" && + metadata.operation_name === "request.fetch" + ); + }); + + const requestSuccesses = operationEntries.filter((entry) => { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + return ( + entry.action === AuditAction.OPERATION_SUCCESS && + metadata?.operation_class === "request" && + metadata.operation_name === "request.fetch" + ); + }); + + const uninterruptedSuccesses = requestSuccesses.filter((entry) => { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + return metadata?.manual_recovery_required === false; + }); + + const firstAttemptStarts = requestStarts.filter((entry) => { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + return metadata?.attempt_no === 1; + }); + const firstAttemptSuccesses = requestSuccesses.filter((entry) => { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + return metadata?.attempt_no === 1 && metadata.retry_count === 0; + }); + + type RequestFlowState = { + firstAttemptFailed: boolean; + eventualSuccess: boolean; + manualRecoveryRequired: boolean; + }; + const requestFlowStates = new Map(); + for (const entry of operationEntries) { + const metadata = entry.metadata as (ReliabilityAuditMetadata & { request_flow_id?: string }) | undefined; + if ( + metadata?.operation_class !== "request" || + metadata.operation_name !== "request.fetch" || + !metadata.request_flow_id + ) { + continue; + } + + const flow = requestFlowStates.get(metadata.request_flow_id) ?? { + firstAttemptFailed: false, + eventualSuccess: false, + manualRecoveryRequired: false, + }; + if (entry.action === AuditAction.OPERATION_FAILURE && metadata.attempt_no === 1) { + flow.firstAttemptFailed = true; + } + if (entry.action === AuditAction.OPERATION_SUCCESS) { + flow.eventualSuccess = true; + } + if (metadata.manual_recovery_required) { + flow.manualRecoveryRequired = true; + } + requestFlowStates.set(metadata.request_flow_id, flow); + } + const flowsWithFirstFailure = [...requestFlowStates.values()].filter((flow) => flow.firstAttemptFailed); + const autoRecoveredFlows = flowsWithFirstFailure.filter( + (flow) => flow.eventualSuccess && !flow.manualRecoveryRequired, + ); + + const authRefreshStarts = operationEntries.filter((entry) => { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + return ( + entry.action === AuditAction.OPERATION_START && + metadata?.operation_class === "auth" && + metadata.operation_name === "auth.refresh-token" + ); + }); + const authRefreshSuccesses = operationEntries.filter((entry) => { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + return ( + entry.action === AuditAction.OPERATION_SUCCESS && + metadata?.operation_class === "auth" && + metadata.operation_name === "auth.refresh-token" + ); + }); + + const startsByClass = new Map(); + const successesByClass = new Map(); + for (const entry of operationEntries) { + const metadata = entry.metadata as ReliabilityAuditMetadata | undefined; + if (!metadata) continue; + if ( + metadata.operation_class === "request" && + metadata.operation_name === "request.exhausted" + ) { + continue; + } + if (entry.action === AuditAction.OPERATION_START) { + startsByClass.set( + metadata.operation_class, + (startsByClass.get(metadata.operation_class) ?? 0) + 1, + ); + } + if (entry.action === AuditAction.OPERATION_SUCCESS) { + successesByClass.set( + metadata.operation_class, + (successesByClass.get(metadata.operation_class) ?? 0) + 1, + ); + } + } + const operationSuccessRateByClass24h: Record = {}; + for (const [operationClass, startCount] of startsByClass.entries()) { + const successCount = successesByClass.get(operationClass) ?? 0; + operationSuccessRateByClass24h[operationClass] = toPercent(successCount, startCount); + } + + return { + requestStarts24h: requestStarts.length, + uninterruptedCompletionRate24h: toPercent( + uninterruptedSuccesses.length, + requestStarts.length, + ), + firstAttemptSuccessRate24h: toPercent( + firstAttemptSuccesses.length, + firstAttemptStarts.length, + ), + autoRecoverySuccessRate24h: toPercent( + autoRecoveredFlows.length, + flowsWithFirstFailure.length, + ), + tokenRefreshSuccessRate24h: toPercent( + authRefreshSuccesses.length, + authRefreshStarts.length, + ), + operationSuccessRateByClass24h, + }; + }; + + const instrumentToolRegistry = >(tools: TTools): TTools => { + for (const [toolName, toolDefinition] of Object.entries(tools)) { + const candidate = toolDefinition as { + execute?: (input: unknown) => Promise | unknown; + }; + if (typeof candidate.execute !== "function") continue; + const originalExecute = candidate.execute.bind(candidate); + candidate.execute = async (input: unknown) => { + const dryRunValue = + typeof input === "object" && + input !== null && + "dryRun" in input && + typeof (input as { dryRun?: unknown }).dryRun === "boolean" + ? (input as { dryRun: boolean }).dryRun + : undefined; + const op = startOperation({ + operationClass: "tool", + operationName: `tool.${toolName}`, + retryProfile: runtimeMetrics.retryProfile, + extraMetadata: + typeof dryRunValue === "boolean" + ? { operation_mode: dryRunValue ? "dry_run" : "apply" } + : undefined, + }); + try { + const result = await originalExecute(input); + completeOperationSuccess(op); + return result; + } catch (error) { + completeOperationFailure(op, { + errorCategory: "tool-execution", + }); + throw error; + } + }; + } + return tools; + }; + + const instrumentAuthMethods = (methods: TMethods): TMethods => { + for (const methodDefinition of methods) { + const candidate = methodDefinition as { + label?: unknown; + authorize?: (input?: Record) => Promise; + }; + if (typeof candidate.authorize !== "function") continue; + const originalAuthorize = candidate.authorize.bind(candidate); + candidate.authorize = async (input?: Record) => { + const label = typeof candidate.label === "string" ? candidate.label : "oauth"; + const authOperation = startOperation({ + operationClass: "auth", + operationName: `auth.method.${label.toLowerCase().replace(/\s+/g, "-")}`, + retryProfile: runtimeMetrics.retryProfile, + }); + try { + const result = await originalAuthorize(input); + completeOperationSuccess(authOperation); + return result; + } catch (error) { + completeOperationFailure(authOperation, { + errorCategory: "auth-method", + }); + throw error; + } + }; + } + return methods; + }; + type TokenSuccess = Extract; type TokenSuccessWithAccount = TokenSuccess & { accountIdOverride?: string; @@ -1706,6 +2114,11 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { const runStartupPreflight = async (): Promise => { if (startupPreflightShown) return; startupPreflightShown = true; + const startupOperation = startOperation({ + operationClass: "startup", + operationName: "startup.preflight", + retryProfile: runtimeMetrics.retryProfile, + }); try { const state = await buildSetupChecklistState(); const message = @@ -1714,7 +2127,18 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { `Next: ${state.nextAction}`; await showToast(message, state.summary.healthy > 0 ? "info" : "warning"); logInfo(message); + completeOperationSuccess(startupOperation, { + extraMetadata: { + healthy_accounts: state.summary.healthy, + total_accounts: state.summary.total, + blocked_accounts: state.summary.blocked, + rate_limited_accounts: state.summary.rateLimited, + }, + }); } catch (error) { + completeOperationFailure(startupOperation, { + errorCategory: "startup-preflight", + }); logDebug( `[${PLUGIN_NAME}] Startup preflight skipped: ${ error instanceof Error ? error.message : String(error) @@ -1730,6 +2154,13 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { // Event handler for session recovery and account selection const eventHandler = async (input: { event: { type: string; properties?: unknown } }) => { + const eventType = input.event.type ?? "unknown"; + const eventOperation = startOperation({ + operationClass: "ui_event", + operationName: `ui-event.${eventType}`, + retryProfile: runtimeMetrics.retryProfile, + extraMetadata: { event_type: eventType }, + }); try { const { event } = input; // Handle TUI account selection events @@ -1741,6 +2172,9 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { const props = event.properties as { index?: number; accountIndex?: number; provider?: string }; // Filter by provider if specified if (props.provider && props.provider !== "openai" && props.provider !== PROVIDER_ID) { + completeOperationSuccess(eventOperation, { + extraMetadata: { ignored_reason: "provider-mismatch" }, + }); return; } @@ -1748,6 +2182,9 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { if (typeof index === "number") { const storage = await loadAccounts(); if (!storage || index < 0 || index >= storage.accounts.length) { + completeOperationSuccess(eventOperation, { + extraMetadata: { ignored_reason: "invalid-account-index" }, + }); return; } @@ -1776,7 +2213,11 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { await showToast(`Switched to account ${index + 1}`, "info"); } } + completeOperationSuccess(eventOperation); } catch (error) { + completeOperationFailure(eventOperation, { + errorCategory: "ui-event", + }); logDebug(`[${PLUGIN_NAME}] Event handler error: ${error instanceof Error ? error.message : String(error)}`); } }; @@ -2173,6 +2614,8 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { let allRateLimitedRetries = 0; let emptyResponseRetries = 0; + const requestFlowId = randomUUID(); + let requestAttemptNumber = 0; const attemptedUnsupportedFallbackModels = new Set(); if (model) { attemptedUnsupportedFallbackModels.add(model); @@ -2216,8 +2659,20 @@ while (attempted.size < Math.max(1, accountCount)) { ); let accountAuth = accountManager.toAuthDetails(account) as OAuthAuthDetails; + let refreshOperation: OperationTracker | null = null; try { if (shouldRefreshToken(accountAuth, tokenRefreshSkewMs)) { + refreshOperation = startOperation({ + operationClass: "auth", + operationName: "auth.refresh-token", + attemptNo: 1, + retryCount: 0, + modelFamily, + retryProfile, + extraMetadata: { + request_flow_id: requestFlowId, + }, + }); accountAuth = (await refreshAndUpdateToken( accountAuth, client, @@ -2225,8 +2680,14 @@ while (attempted.size < Math.max(1, accountCount)) { accountManager.updateFromAuth(account, accountAuth); accountManager.clearAuthFailures(account); accountManager.saveToDiskDebounced(); + completeOperationSuccess(refreshOperation); } } catch (err) { + if (refreshOperation) { + completeOperationFailure(refreshOperation, { + errorCategory: "auth-refresh", + }); + } logDebug(`[${PLUGIN_NAME}] Auth refresh failed for account: ${(err as Error)?.message ?? String(err)}`); if ( !consumeRetryBudget( @@ -2340,6 +2801,19 @@ while (attempted.size < Math.max(1, accountCount)) { } while (true) { + requestAttemptNumber++; + const requestOperation = startOperation({ + operationClass: "request", + operationName: "request.fetch", + attemptNo: requestAttemptNumber, + retryCount: Math.max(0, requestAttemptNumber - 1), + modelFamily, + retryProfile, + extraMetadata: { + request_flow_id: requestFlowId, + quota_key: quotaKey, + }, + }); let response: Response; const fetchStart = performance.now(); @@ -2378,6 +2852,11 @@ while (attempted.size < Math.max(1, accountCount)) { `Network error on account ${account.index + 1}: ${errorMsg}`, ) ) { + completeOperationFailure(requestOperation, { + errorCategory: "network", + manualRecoveryRequired: true, + extraMetadata: { request_flow_id: requestFlowId }, + }); accountManager.refundToken(account, modelFamily, model); return new Response( JSON.stringify({ @@ -2401,6 +2880,15 @@ while (attempted.size < Math.max(1, accountCount)) { runtimeMetrics.lastErrorCategory = "network"; accountManager.refundToken(account, modelFamily, model); accountManager.recordFailure(account, modelFamily, model); + completeOperationFailure(requestOperation, { + errorCategory: "network", + extraMetadata: { request_flow_id: requestFlowId }, + }); + markOperationRecovery(requestOperation, { + errorCategory: "network", + recoveryStep: "account-rotation", + extraMetadata: { request_flow_id: requestFlowId }, + }); break; } finally { clearTimeout(fetchTimeoutId); @@ -2421,6 +2909,12 @@ while (attempted.size < Math.max(1, accountCount)) { if (!response.ok) { const contextOverflowResult = await handleContextOverflow(response, model); if (contextOverflowResult.handled) { + completeOperationSuccess(requestOperation, { + extraMetadata: { + request_flow_id: requestFlowId, + context_overflow_recovered: true, + }, + }); return contextOverflowResult.response; } @@ -2443,6 +2937,22 @@ while (attempted.size < Math.max(1, accountCount)) { account.lastSwitchReason = "rotation"; runtimeMetrics.lastError = `Unsupported model on account ${account.index + 1}: ${blockedModel}`; runtimeMetrics.lastErrorCategory = "unsupported-model"; + completeOperationFailure(requestOperation, { + errorCategory: "unsupported-model", + httpStatus: response.status, + extraMetadata: { + request_flow_id: requestFlowId, + blocked_model: blockedModel, + }, + }); + markOperationRecovery(requestOperation, { + errorCategory: "unsupported-model", + httpStatus: response.status, + recoveryStep: "account-rotation", + extraMetadata: { + request_flow_id: requestFlowId, + }, + }); logWarn( `Model ${blockedModel} is unsupported for account ${account.index + 1}. Trying next account/workspace before fallback.`, { @@ -2497,6 +3007,23 @@ while (attempted.size < Math.max(1, accountCount)) { }; runtimeMetrics.lastError = `Model fallback: ${previousModel} -> ${model}`; runtimeMetrics.lastErrorCategory = "model-fallback"; + completeOperationFailure(requestOperation, { + errorCategory: "unsupported-model", + httpStatus: response.status, + extraMetadata: { + request_flow_id: requestFlowId, + blocked_model: previousModel, + }, + }); + markOperationRecovery(requestOperation, { + errorCategory: "model-fallback", + httpStatus: response.status, + recoveryStep: "model-fallback", + extraMetadata: { + request_flow_id: requestFlowId, + fallback_model: model, + }, + }); logWarn( `Model ${previousModel} is unsupported for this ChatGPT account. Falling back to ${model}.`, { @@ -2565,8 +3092,25 @@ while (attempted.size < Math.max(1, accountCount)) { `Server error ${response.status} on account ${account.index + 1}`, ) ) { + completeOperationFailure(requestOperation, { + errorCategory: "server", + httpStatus: response.status, + manualRecoveryRequired: true, + extraMetadata: { request_flow_id: requestFlowId }, + }); return errorResponse; } + completeOperationFailure(requestOperation, { + errorCategory: "server", + httpStatus: response.status, + extraMetadata: { request_flow_id: requestFlowId }, + }); + markOperationRecovery(requestOperation, { + errorCategory: "server", + httpStatus: response.status, + recoveryStep: "account-rotation", + extraMetadata: { request_flow_id: requestFlowId }, + }); break; } @@ -2601,6 +3145,19 @@ while (attempted.size < Math.max(1, accountCount)) { } await sleep(addJitter(Math.max(MIN_BACKOFF_MS, delayMs), 0.2)); + markOperationRetry(requestOperation, { + errorCategory: "rate-limit-short", + httpStatus: response.status, + extraMetadata: { + request_flow_id: requestFlowId, + backoff_ms: delayMs, + }, + }); + completeOperationFailure(requestOperation, { + errorCategory: "rate-limit-short", + httpStatus: response.status, + extraMetadata: { request_flow_id: requestFlowId }, + }); continue; } @@ -2616,6 +3173,17 @@ while (attempted.size < Math.max(1, accountCount)) { runtimeMetrics.accountRotations++; runtimeMetrics.lastErrorCategory = "rate-limit"; accountManager.saveToDiskDebounced(); + completeOperationFailure(requestOperation, { + errorCategory: "rate-limit", + httpStatus: response.status, + extraMetadata: { request_flow_id: requestFlowId, backoff_ms: delayMs }, + }); + markOperationRecovery(requestOperation, { + errorCategory: "rate-limit", + httpStatus: response.status, + recoveryStep: "account-rotation", + extraMetadata: { request_flow_id: requestFlowId, backoff_ms: delayMs }, + }); logWarn( `Rate limited. Rotating account ${account.index + 1} (${account.email ?? "unknown"}).`, ); @@ -2639,6 +3207,12 @@ while (attempted.size < Math.max(1, accountCount)) { runtimeMetrics.failedRequests++; runtimeMetrics.lastError = `HTTP ${response.status}`; runtimeMetrics.lastErrorCategory = "http"; + completeOperationFailure(requestOperation, { + errorCategory: "http", + httpStatus: response.status, + manualRecoveryRequired: true, + extraMetadata: { request_flow_id: requestFlowId }, + }); return errorResponse; } @@ -2652,6 +3226,12 @@ while (attempted.size < Math.max(1, accountCount)) { runtimeMetrics.failedRequests++; runtimeMetrics.lastError = `HTTP ${successResponse.status}`; runtimeMetrics.lastErrorCategory = "http"; + completeOperationFailure(requestOperation, { + errorCategory: "http", + httpStatus: successResponse.status, + manualRecoveryRequired: true, + extraMetadata: { request_flow_id: requestFlowId }, + }); return successResponse; } @@ -2678,6 +3258,19 @@ while (attempted.size < Math.max(1, accountCount)) { ); accountManager.refundToken(account, modelFamily, model); accountManager.recordFailure(account, modelFamily, model); + markOperationRetry(requestOperation, { + errorCategory: "empty-response", + extraMetadata: { + request_flow_id: requestFlowId, + retry_attempt: emptyResponseRetries, + }, + }); + completeOperationFailure(requestOperation, { + errorCategory: "empty-response", + extraMetadata: { + request_flow_id: requestFlowId, + }, + }); await sleep(addJitter(emptyResponseRetryDelayMs, 0.2)); break; } @@ -2692,6 +3285,10 @@ while (attempted.size < Math.max(1, accountCount)) { runtimeMetrics.successfulRequests++; runtimeMetrics.lastError = null; runtimeMetrics.lastErrorCategory = null; + completeOperationSuccess(requestOperation, { + httpStatus: successResponse.status, + extraMetadata: { request_flow_id: requestFlowId }, + }); return successResponse; } if (restartAccountTraversalWithFallback) { @@ -2734,6 +3331,24 @@ while (attempted.size < Math.max(1, accountCount)) { runtimeMetrics.failedRequests++; runtimeMetrics.lastError = message; runtimeMetrics.lastErrorCategory = waitMs > 0 ? "rate-limit" : "account-failure"; + const exhaustedOperation = startOperation({ + operationClass: "request", + operationName: "request.exhausted", + attemptNo: requestAttemptNumber + 1, + retryCount: requestAttemptNumber, + modelFamily, + retryProfile, + extraMetadata: { + request_flow_id: requestFlowId, + }, + }); + completeOperationFailure(exhaustedOperation, { + errorCategory: waitMs > 0 ? "rate-limit" : "account-failure", + manualRecoveryRequired: true, + extraMetadata: { + request_flow_id: requestFlowId, + }, + }); return new Response(JSON.stringify({ error: { message } }), { status: waitMs > 0 ? 429 : 503, headers: { @@ -2751,7 +3366,7 @@ while (attempted.size < Math.max(1, accountCount)) { loaderMutex = null; } }, - methods: [ + methods: instrumentAuthMethods([ { label: AUTH_LABELS.OAUTH, type: "oauth" as const, @@ -3751,9 +4366,9 @@ while (attempted.size < Math.max(1, accountCount)) { }); }, }, - ], + ]), }, - tool: { + tool: instrumentToolRegistry({ "codex-list": tool({ description: "List all Codex OAuth accounts and the current active index.", @@ -4238,6 +4853,13 @@ while (attempted.size < Math.max(1, accountCount)) { const total = runtimeMetrics.totalRequests; const successful = runtimeMetrics.successfulRequests; const refreshMetrics = getRefreshQueueMetrics(); + const reliabilityKpis = computeReliabilityKpis(now); + const operationClassRates = Object.entries( + reliabilityKpis.operationSuccessRateByClass24h, + ) + .sort(([classA], [classB]) => classA.localeCompare(classB)) + .map(([operationClass, value]) => `${operationClass}=${formatPercent(value)}`) + .join(", "); const successRate = total > 0 ? ((successful / total) * 100).toFixed(1) : "0.0"; const avgLatencyMs = successful > 0 @@ -4279,6 +4901,14 @@ while (attempted.size < Math.max(1, accountCount)) { `${refreshMetrics.failed}/` + `${refreshMetrics.pending}`, `Last upstream request: ${lastRequest}`, + "", + "Local reliability KPIs (best-effort 24h, retention-bounded):", + `Request starts: ${reliabilityKpis.requestStarts24h}`, + `Uninterrupted completion rate: ${formatPercent(reliabilityKpis.uninterruptedCompletionRate24h)}`, + `First-attempt success rate: ${formatPercent(reliabilityKpis.firstAttemptSuccessRate24h)}`, + `Auto-recovery success rate: ${formatPercent(reliabilityKpis.autoRecoverySuccessRate24h)}`, + `Token refresh success rate: ${formatPercent(reliabilityKpis.tokenRefreshSuccessRate24h)}`, + `Operation success by class: ${operationClassRates || "n/a"}`, ]; if (runtimeMetrics.lastError) { @@ -4339,6 +4969,39 @@ while (attempted.size < Math.max(1, accountCount)) { "muted", ), formatUiKeyValue(ui, "Last upstream request", lastRequest, "muted"), + "", + ...formatUiSection(ui, "Local reliability KPIs (best-effort 24h, retention-bounded)"), + formatUiKeyValue(ui, "Request starts", String(reliabilityKpis.requestStarts24h)), + formatUiKeyValue( + ui, + "Uninterrupted completion", + formatPercent(reliabilityKpis.uninterruptedCompletionRate24h), + "accent", + ), + formatUiKeyValue( + ui, + "First-attempt success", + formatPercent(reliabilityKpis.firstAttemptSuccessRate24h), + "accent", + ), + formatUiKeyValue( + ui, + "Auto-recovery success", + formatPercent(reliabilityKpis.autoRecoverySuccessRate24h), + "accent", + ), + formatUiKeyValue( + ui, + "Token refresh success", + formatPercent(reliabilityKpis.tokenRefreshSuccessRate24h), + "accent", + ), + formatUiKeyValue( + ui, + "Operation success by class", + operationClassRates || "n/a", + "muted", + ), ]; if (runtimeMetrics.lastError) { styled.push(formatUiKeyValue(ui, "Last error", runtimeMetrics.lastError, "danger")); @@ -5620,7 +6283,7 @@ while (attempted.size < Math.max(1, accountCount)) { }, }), - }, + }), }; }; diff --git a/lib/audit.ts b/lib/audit.ts index 3976b11b..39216013 100644 --- a/lib/audit.ts +++ b/lib/audit.ts @@ -1,4 +1,13 @@ -import { mkdirSync, existsSync, statSync, renameSync, readdirSync, unlinkSync, appendFileSync } from "node:fs"; +import { + appendFileSync, + existsSync, + mkdirSync, + readFileSync, + readdirSync, + renameSync, + statSync, + unlinkSync, +} from "node:fs"; // Simple in-memory queue to prevent EBUSY locks during highly concurrent writes const logQueue: string[] = []; @@ -33,6 +42,8 @@ export enum AuditAction { ACCOUNT_REFRESH = "account.refresh", ACCOUNT_EXPORT = "account.export", ACCOUNT_IMPORT = "account.import", + ACCOUNT_SYNC_PULL = "account.sync.pull", + ACCOUNT_SYNC_PUSH = "account.sync.push", AUTH_LOGIN = "auth.login", AUTH_LOGOUT = "auth.logout", AUTH_REFRESH = "auth.refresh", @@ -44,6 +55,11 @@ export enum AuditAction { REQUEST_FAILURE = "request.failure", CIRCUIT_OPEN = "circuit.open", CIRCUIT_CLOSE = "circuit.close", + OPERATION_START = "operation.start", + OPERATION_SUCCESS = "operation.success", + OPERATION_FAILURE = "operation.failure", + OPERATION_RETRY = "operation.retry", + OPERATION_RECOVERY = "operation.recovery", } export enum AuditOutcome { @@ -69,6 +85,44 @@ export interface AuditConfig { maxFiles: number; } +export type OperationClass = + | "request" + | "auth" + | "account" + | "tool" + | "sync" + | "startup" + | "ui_event" + | "storage"; + +export type OperationOutcome = "success" | "failure" | "partial"; + +export interface ReliabilityAuditMetadata { + event_version: "1.0"; + operation_id: string; + process_session_id: string; + operation_class: OperationClass; + operation_name: string; + attempt_no: number; + retry_count: number; + manual_recovery_required: boolean; + beginner_safe_mode: boolean; + operation_mode?: "dry_run" | "apply"; + duration_ms?: number; + error_category?: string; + model_family?: string; + retry_profile?: string; + http_status?: number; + [key: string]: unknown; +} + +export const OPERATION_EVENT_VERSION = "1.0" as const; + +interface ReadAuditEntriesOptions { + sinceMs?: number; + limit?: number; +} + const DEFAULT_CONFIG: AuditConfig = { enabled: true, logDir: join(homedir(), ".opencode", "logs"), @@ -187,3 +241,40 @@ export function listAuditLogFiles(): string[] { .map((f) => join(auditConfig.logDir, f)) .sort(); } + +export function readAuditEntries(options: ReadAuditEntriesOptions = {}): AuditEntry[] { + const { sinceMs, limit } = options; + const minTimestamp = typeof sinceMs === "number" ? sinceMs : null; + const files = listAuditLogFiles(); + const parsedEntries: AuditEntry[] = []; + + for (const filePath of files) { + let content = ""; + try { + content = readFileSync(filePath, "utf8"); + } catch { + continue; + } + + const lines = content.split("\n"); + for (const rawLine of lines) { + const line = rawLine.trim(); + if (!line) continue; + try { + const entry = JSON.parse(line) as AuditEntry; + const parsedTime = Date.parse(entry.timestamp); + if (!Number.isFinite(parsedTime)) continue; + if (minTimestamp !== null && parsedTime < minTimestamp) continue; + parsedEntries.push(entry); + } catch { + // Ignore malformed lines; audit reads should be best-effort. + } + } + } + + parsedEntries.sort((a, b) => Date.parse(a.timestamp) - Date.parse(b.timestamp)); + if (typeof limit === "number" && limit > 0 && parsedEntries.length > limit) { + return parsedEntries.slice(parsedEntries.length - limit); + } + return parsedEntries; +} diff --git a/test/audit.test.ts b/test/audit.test.ts index b0403817..b0672b7b 100644 --- a/test/audit.test.ts +++ b/test/audit.test.ts @@ -10,6 +10,7 @@ import { getAuditConfig, getAuditLogPath, listAuditLogFiles, + readAuditEntries, } from "../lib/audit.js"; describe("Audit logging", () => { @@ -247,6 +248,60 @@ describe("Audit logging", () => { }); }); + describe("readAuditEntries", () => { + it("returns parsed entries filtered by sinceMs", () => { + const now = Date.now(); + auditLog( + AuditAction.OPERATION_START, + "actor", + "request.fetch", + AuditOutcome.PARTIAL, + { + event_version: "1.0", + operation_id: "old", + process_session_id: "p1", + operation_class: "request", + operation_name: "request.fetch", + attempt_no: 1, + retry_count: 0, + manual_recovery_required: false, + beginner_safe_mode: false, + }, + ); + + const sinceMs = now - 1000; + auditLog( + AuditAction.OPERATION_SUCCESS, + "actor", + "request.fetch", + AuditOutcome.SUCCESS, + { + event_version: "1.0", + operation_id: "new", + process_session_id: "p1", + operation_class: "request", + operation_name: "request.fetch", + attempt_no: 2, + retry_count: 1, + manual_recovery_required: false, + beginner_safe_mode: false, + }, + ); + + const entries = readAuditEntries({ sinceMs }); + expect(entries.length).toBeGreaterThanOrEqual(1); + expect(entries.some((entry) => entry.action === AuditAction.OPERATION_SUCCESS)).toBe(true); + }); + + it("respects the limit option", () => { + auditLog(AuditAction.ACCOUNT_ADD, "actor", "r1", AuditOutcome.SUCCESS); + auditLog(AuditAction.ACCOUNT_REMOVE, "actor", "r2", AuditOutcome.SUCCESS); + + const entries = readAuditEntries({ limit: 1 }); + expect(entries).toHaveLength(1); + }); + }); + describe("AuditAction enum", () => { it("should have all expected actions", () => { expect(AuditAction.ACCOUNT_ADD).toBe("account.add"); @@ -254,6 +309,11 @@ describe("Audit logging", () => { expect(AuditAction.CONFIG_LOAD).toBe("config.load"); expect(AuditAction.REQUEST_START).toBe("request.start"); expect(AuditAction.CIRCUIT_OPEN).toBe("circuit.open"); + expect(AuditAction.OPERATION_START).toBe("operation.start"); + expect(AuditAction.OPERATION_SUCCESS).toBe("operation.success"); + expect(AuditAction.OPERATION_FAILURE).toBe("operation.failure"); + expect(AuditAction.OPERATION_RETRY).toBe("operation.retry"); + expect(AuditAction.OPERATION_RECOVERY).toBe("operation.recovery"); }); }); diff --git a/test/index.test.ts b/test/index.test.ts index 02e79061..8bd1332f 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -265,6 +265,24 @@ vi.mock("../lib/storage.js", () => ({ formatStorageErrorHint: () => "Check file permissions", })); +vi.mock("../lib/audit.js", () => ({ + AuditAction: { + OPERATION_START: "operation.start", + OPERATION_SUCCESS: "operation.success", + OPERATION_FAILURE: "operation.failure", + OPERATION_RETRY: "operation.retry", + OPERATION_RECOVERY: "operation.recovery", + }, + AuditOutcome: { + SUCCESS: "success", + FAILURE: "failure", + PARTIAL: "partial", + }, + OPERATION_EVENT_VERSION: "1.0", + auditLog: vi.fn(), + readAuditEntries: vi.fn(() => []), +})); + vi.mock("../lib/accounts.js", () => { class MockAccountManager { private accounts = [ @@ -693,6 +711,190 @@ describe("OpenAIOAuthPlugin", () => { const result = await plugin.tool["codex-metrics"].execute(); expect(result).toContain("Codex Plugin Metrics"); expect(result).toContain("Total upstream requests"); + expect(result).toContain("Local reliability KPIs"); + expect(result).toContain("retention-bounded"); + }); + + it("renders best-effort 24h reliability percentages from local audit events", async () => { + const auditModule = await import("../lib/audit.js"); + const readAuditEntriesMock = vi.mocked(auditModule.readAuditEntries); + const timestamp = new Date().toISOString(); + readAuditEntriesMock.mockReturnValue([ + { + timestamp, + correlationId: null, + action: "operation.start", + actor: "plugin", + resource: "request.fetch", + outcome: "partial", + metadata: { + event_version: "1.0", + operation_id: "req-1", + process_session_id: "proc-1", + operation_class: "request", + operation_name: "request.fetch", + attempt_no: 1, + retry_count: 0, + manual_recovery_required: false, + beginner_safe_mode: false, + request_flow_id: "flow-1", + }, + }, + { + timestamp, + correlationId: null, + action: "operation.success", + actor: "plugin", + resource: "request.fetch", + outcome: "success", + metadata: { + event_version: "1.0", + operation_id: "req-1", + process_session_id: "proc-1", + operation_class: "request", + operation_name: "request.fetch", + attempt_no: 1, + retry_count: 0, + manual_recovery_required: false, + beginner_safe_mode: false, + request_flow_id: "flow-1", + }, + }, + { + timestamp, + correlationId: null, + action: "operation.start", + actor: "plugin", + resource: "auth.refresh-token", + outcome: "partial", + metadata: { + event_version: "1.0", + operation_id: "auth-1", + process_session_id: "proc-1", + operation_class: "auth", + operation_name: "auth.refresh-token", + attempt_no: 1, + retry_count: 0, + manual_recovery_required: false, + beginner_safe_mode: false, + }, + }, + { + timestamp, + correlationId: null, + action: "operation.success", + actor: "plugin", + resource: "auth.refresh-token", + outcome: "success", + metadata: { + event_version: "1.0", + operation_id: "auth-1", + process_session_id: "proc-1", + operation_class: "auth", + operation_name: "auth.refresh-token", + attempt_no: 1, + retry_count: 0, + manual_recovery_required: false, + beginner_safe_mode: false, + }, + }, + ]); + + const result = await plugin.tool["codex-metrics"].execute(); + expect(result).toContain("Uninterrupted completion rate: 100.0%"); + expect(result).toContain("First-attempt success rate: 100.0%"); + expect(result).toContain("Token refresh success rate: 100.0%"); + }); + + it("excludes request.exhausted from class-level request success denominator", async () => { + const auditModule = await import("../lib/audit.js"); + const readAuditEntriesMock = vi.mocked(auditModule.readAuditEntries); + const timestamp = new Date().toISOString(); + readAuditEntriesMock.mockReturnValue([ + { + timestamp, + correlationId: null, + action: "operation.start", + actor: "plugin", + resource: "request.fetch", + outcome: "partial", + metadata: { + event_version: "1.0", + operation_id: "req-1", + process_session_id: "proc-1", + operation_class: "request", + operation_name: "request.fetch", + attempt_no: 1, + retry_count: 0, + manual_recovery_required: false, + beginner_safe_mode: false, + request_flow_id: "flow-1", + }, + }, + { + timestamp, + correlationId: null, + action: "operation.success", + actor: "plugin", + resource: "request.fetch", + outcome: "success", + metadata: { + event_version: "1.0", + operation_id: "req-1", + process_session_id: "proc-1", + operation_class: "request", + operation_name: "request.fetch", + attempt_no: 1, + retry_count: 0, + manual_recovery_required: false, + beginner_safe_mode: false, + request_flow_id: "flow-1", + }, + }, + { + timestamp, + correlationId: null, + action: "operation.start", + actor: "plugin", + resource: "request.exhausted", + outcome: "partial", + metadata: { + event_version: "1.0", + operation_id: "req-x", + process_session_id: "proc-1", + operation_class: "request", + operation_name: "request.exhausted", + attempt_no: 2, + retry_count: 1, + manual_recovery_required: true, + beginner_safe_mode: false, + request_flow_id: "flow-1", + }, + }, + { + timestamp, + correlationId: null, + action: "operation.failure", + actor: "plugin", + resource: "request.exhausted", + outcome: "failure", + metadata: { + event_version: "1.0", + operation_id: "req-x", + process_session_id: "proc-1", + operation_class: "request", + operation_name: "request.exhausted", + attempt_no: 2, + retry_count: 1, + manual_recovery_required: true, + beginner_safe_mode: false, + request_flow_id: "flow-1", + }, + }, + ]); + + const result = await plugin.tool["codex-metrics"].execute(); + expect(result).toContain("Operation success by class: request=100.0%"); }); }); From 6322ce58f22bb2aac75688c551dfc137cdc4efc6 Mon Sep 17 00:00:00 2001 From: Neil Daquioag <405533+ndycode@users.noreply.github.com> Date: Sun, 1 Mar 2026 13:01:02 +0800 Subject: [PATCH 2/3] fix: make operation sequence generation atomic Address Greptile feedback by replacing non-atomic local increment with an Atomics-backed shared counter for operation id sequencing. Co-authored-by: Codex --- index.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/index.ts b/index.ts index bd7ba9bb..ed71eea4 100644 --- a/index.ts +++ b/index.ts @@ -291,7 +291,7 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { }; const processSessionId = randomUUID(); - let operationSequence = 0; + const operationSequenceCounter = new Int32Array(new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT)); type OperationTracker = { operationId: string; @@ -339,9 +339,11 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => { const toPercent = (numerator: number, denominator: number): number | null => denominator > 0 ? (numerator / denominator) * 100 : null; + const nextOperationSequence = (): number => Atomics.add(operationSequenceCounter, 0, 1) + 1; + const createOperationId = (operationClass: OperationClass): string => { - operationSequence += 1; - return `${operationClass}-${Date.now()}-${operationSequence}-${randomUUID().slice(0, 8)}`; + const sequence = nextOperationSequence(); + return `${operationClass}-${Date.now()}-${sequence}-${randomUUID().slice(0, 8)}`; }; const buildOperationMetadata = ( From 0c644376062698f9076083ace826b6c85935fc2c Mon Sep 17 00:00:00 2001 From: Neil Daquioag <405533+ndycode@users.noreply.github.com> Date: Sun, 1 Mar 2026 13:16:30 +0800 Subject: [PATCH 3/3] fix: address CodeRabbit reliability and docs findings Harden audit log read paths, bound in-memory queue growth on persistent write failures, remove undocumented codex-sync command from README, and strengthen readAuditEntries tests. Co-authored-by: Codex --- README.md | 1 - lib/audit.ts | 20 +++++++++++-- test/audit.test.ts | 74 ++++++++++++++++++++++------------------------ 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 58e9c4b4..c41e8569 100644 --- a/README.md +++ b/README.md @@ -554,7 +554,6 @@ codex-dashboard | `codex-remove` | Remove account entry | `codex-remove index=3` | | `codex-export` | Export account backups | `codex-export` | | `codex-import` | Dry-run or apply imports | `codex-import path="~/backup/accounts.json" dryRun=true` | -| `codex-sync` | Manual bidirectional sync with Codex CLI auth | `codex-sync direction="pull"` | --- diff --git a/lib/audit.ts b/lib/audit.ts index 39216013..94ec56eb 100644 --- a/lib/audit.ts +++ b/lib/audit.ts @@ -12,6 +12,15 @@ import { // Simple in-memory queue to prevent EBUSY locks during highly concurrent writes const logQueue: string[] = []; let isFlushing = false; +const MAX_LOG_QUEUE_ITEMS = 5000; + +function trimQueueToLimit(context: string): void { + if (logQueue.length <= MAX_LOG_QUEUE_ITEMS) return; + + const dropped = logQueue.length - MAX_LOG_QUEUE_ITEMS; + logQueue.splice(0, dropped); + console.warn(`[AuditLog] Dropped ${dropped} queued entries (${context}) to bound memory usage.`); +} function flushLogQueue(logPath: string): void { if (isFlushing || logQueue.length === 0) return; @@ -26,6 +35,7 @@ function flushLogQueue(logPath: string): void { // If the file is locked by an external process (e.g. antivirus), // we unshift the items back to the front of the queue to try again later logQueue.unshift(...itemsToFlush); + trimQueueToLimit("flush-failure"); console.error("[AuditLog] Failed to flush queue, retaining items:", error); } finally { isFlushing = false; @@ -221,8 +231,9 @@ export function auditLog( const logPath = getLogFilePath(); const line = JSON.stringify(entry) + "\n"; - + logQueue.push(line); + trimQueueToLimit("append"); flushLogQueue(logPath); } catch { // Audit logging should never break the application @@ -245,7 +256,12 @@ export function listAuditLogFiles(): string[] { export function readAuditEntries(options: ReadAuditEntriesOptions = {}): AuditEntry[] { const { sinceMs, limit } = options; const minTimestamp = typeof sinceMs === "number" ? sinceMs : null; - const files = listAuditLogFiles(); + let files: string[] = []; + try { + files = listAuditLogFiles(); + } catch { + return []; + } const parsedEntries: AuditEntry[] = []; for (const filePath of files) { diff --git a/test/audit.test.ts b/test/audit.test.ts index b0672b7b..9596b9b4 100644 --- a/test/audit.test.ts +++ b/test/audit.test.ts @@ -251,46 +251,32 @@ describe("Audit logging", () => { describe("readAuditEntries", () => { it("returns parsed entries filtered by sinceMs", () => { const now = Date.now(); - auditLog( - AuditAction.OPERATION_START, - "actor", - "request.fetch", - AuditOutcome.PARTIAL, - { - event_version: "1.0", - operation_id: "old", - process_session_id: "p1", - operation_class: "request", - operation_name: "request.fetch", - attempt_no: 1, - retry_count: 0, - manual_recovery_required: false, - beginner_safe_mode: false, - }, - ); - - const sinceMs = now - 1000; - auditLog( - AuditAction.OPERATION_SUCCESS, - "actor", - "request.fetch", - AuditOutcome.SUCCESS, - { - event_version: "1.0", - operation_id: "new", - process_session_id: "p1", - operation_class: "request", - operation_name: "request.fetch", - attempt_no: 2, - retry_count: 1, - manual_recovery_required: false, - beginner_safe_mode: false, - }, + const logPath = getAuditLogPath(); + writeFileSync( + logPath, + [ + JSON.stringify({ + timestamp: new Date(now - 60_000).toISOString(), + correlationId: null, + action: AuditAction.OPERATION_START, + actor: "actor", + resource: "request.fetch", + outcome: AuditOutcome.PARTIAL, + }), + JSON.stringify({ + timestamp: new Date(now).toISOString(), + correlationId: null, + action: AuditAction.OPERATION_SUCCESS, + actor: "actor", + resource: "request.fetch", + outcome: AuditOutcome.SUCCESS, + }), + ].join("\n") + "\n", ); - const entries = readAuditEntries({ sinceMs }); - expect(entries.length).toBeGreaterThanOrEqual(1); - expect(entries.some((entry) => entry.action === AuditAction.OPERATION_SUCCESS)).toBe(true); + const entries = readAuditEntries({ sinceMs: now - 1000 }); + expect(entries).toHaveLength(1); + expect(entries[0]?.action).toBe(AuditAction.OPERATION_SUCCESS); }); it("respects the limit option", () => { @@ -299,6 +285,18 @@ describe("Audit logging", () => { const entries = readAuditEntries({ limit: 1 }); expect(entries).toHaveLength(1); + expect(entries[0]?.action).toBe(AuditAction.ACCOUNT_REMOVE); + }); + + it("returns empty array when audit files cannot be listed", () => { + const badPath = join(testLogDir, "blocked.log"); + writeFileSync(badPath, "not-a-directory", "utf8"); + configureAudit({ + enabled: true, + logDir: badPath, + }); + + expect(readAuditEntries()).toEqual([]); }); });